xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti < %s | FileCheck -check-prefixes=SI,SI-SDAG %s
3; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=CI,CI-SDAG %s
4; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s
5; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=CI,CI-GISEL %s
6; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
7; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s
8; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s
9
10define amdgpu_kernel void @is_private_vgpr(ptr addrspace(1) %ptr.ptr) {
11; SI-LABEL: is_private_vgpr:
12; SI:       ; %bb.0:
13; SI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
14; SI-NEXT:    s_load_dword s4, s[8:9], 0x32
15; SI-NEXT:    s_mov_b32 s2, 0
16; SI-NEXT:    s_mov_b32 s3, 0x100f000
17; SI-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
18; SI-NEXT:    v_mov_b32_e32 v1, 0
19; SI-NEXT:    s_waitcnt lgkmcnt(0)
20; SI-NEXT:    buffer_load_dwordx2 v[0:1], v[0:1], s[0:3], 0 addr64 glc
21; SI-NEXT:    s_waitcnt vmcnt(0)
22; SI-NEXT:    s_mov_b32 s2, -1
23; SI-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v1
24; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
25; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
26; SI-NEXT:    s_endpgm
27;
28; CI-SDAG-LABEL: is_private_vgpr:
29; CI-SDAG:       ; %bb.0:
30; CI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
31; CI-SDAG-NEXT:    s_load_dword s2, s[8:9], 0x32
32; CI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
33; CI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
34; CI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
35; CI-SDAG-NEXT:    v_add_i32_e32 v0, vcc, s0, v0
36; CI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
37; CI-SDAG-NEXT:    flat_load_dwordx2 v[0:1], v[0:1] glc
38; CI-SDAG-NEXT:    s_waitcnt vmcnt(0)
39; CI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, s2, v1
40; CI-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
41; CI-SDAG-NEXT:    flat_store_dword v[0:1], v0
42; CI-SDAG-NEXT:    s_endpgm
43;
44; GFX9-LABEL: is_private_vgpr:
45; GFX9:       ; %bb.0:
46; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
47; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
48; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
49; GFX9-NEXT:    global_load_dwordx2 v[0:1], v0, s[0:1] glc
50; GFX9-NEXT:    s_waitcnt vmcnt(0)
51; GFX9-NEXT:    s_mov_b64 s[0:1], src_private_base
52; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, s1, v1
53; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
54; GFX9-NEXT:    global_store_dword v[0:1], v0, off
55; GFX9-NEXT:    s_endpgm
56;
57; CI-GISEL-LABEL: is_private_vgpr:
58; CI-GISEL:       ; %bb.0:
59; CI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
60; CI-GISEL-NEXT:    s_load_dword s2, s[8:9], 0x32
61; CI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 3, v0
62; CI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
63; CI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
64; CI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
65; CI-GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
66; CI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
67; CI-GISEL-NEXT:    flat_load_dwordx2 v[0:1], v[0:1] glc
68; CI-GISEL-NEXT:    s_waitcnt vmcnt(0)
69; CI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, s2, v1
70; CI-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
71; CI-GISEL-NEXT:    flat_store_dword v[0:1], v0
72; CI-GISEL-NEXT:    s_endpgm
73;
74; GFX10-LABEL: is_private_vgpr:
75; GFX10:       ; %bb.0:
76; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
77; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
78; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
79; GFX10-NEXT:    global_load_dwordx2 v[0:1], v0, s[0:1] glc dlc
80; GFX10-NEXT:    s_waitcnt vmcnt(0)
81; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
82; GFX10-NEXT:    s_mov_b64 s[0:1], src_private_base
83; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, s1, v1
84; GFX10-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
85; GFX10-NEXT:    global_store_dword v[0:1], v0, off
86; GFX10-NEXT:    s_endpgm
87;
88; GFX11-LABEL: is_private_vgpr:
89; GFX11:       ; %bb.0:
90; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
91; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
92; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
93; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
94; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
95; GFX11-NEXT:    global_load_b64 v[0:1], v0, s[0:1] glc dlc
96; GFX11-NEXT:    s_waitcnt vmcnt(0)
97; GFX11-NEXT:    s_mov_b64 s[0:1], src_private_base
98; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, s1, v1
99; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
100; GFX11-NEXT:    global_store_b32 v[0:1], v0, off
101; GFX11-NEXT:    s_endpgm
102  %id = call i32 @llvm.amdgcn.workitem.id.x()
103  %gep = getelementptr inbounds ptr, ptr addrspace(1) %ptr.ptr, i32 %id
104  %ptr = load volatile ptr, ptr addrspace(1) %gep
105  %val = call i1 @llvm.amdgcn.is.private(ptr %ptr)
106  %ext = zext i1 %val to i32
107  store i32 %ext, ptr addrspace(1) undef
108  ret void
109}
110
111; FIXME: setcc (zero_extend (setcc)), 1) not folded out, resulting in
112; select and vcc branch.
113define amdgpu_kernel void @is_private_sgpr(ptr %ptr) {
114; SI-LABEL: is_private_sgpr:
115; SI:       ; %bb.0:
116; SI-NEXT:    s_load_dword s0, s[8:9], 0x1
117; SI-NEXT:    s_load_dword s1, s[8:9], 0x32
118; SI-NEXT:    s_waitcnt lgkmcnt(0)
119; SI-NEXT:    s_cmp_eq_u32 s0, s1
120; SI-NEXT:    s_cselect_b64 s[0:1], -1, 0
121; SI-NEXT:    s_andn2_b64 vcc, exec, s[0:1]
122; SI-NEXT:    s_cbranch_vccnz .LBB1_2
123; SI-NEXT:  ; %bb.1: ; %bb0
124; SI-NEXT:    s_mov_b32 s3, 0x100f000
125; SI-NEXT:    s_mov_b32 s2, -1
126; SI-NEXT:    v_mov_b32_e32 v0, 0
127; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
128; SI-NEXT:    s_waitcnt vmcnt(0)
129; SI-NEXT:  .LBB1_2: ; %bb1
130; SI-NEXT:    s_endpgm
131;
132; CI-SDAG-LABEL: is_private_sgpr:
133; CI-SDAG:       ; %bb.0:
134; CI-SDAG-NEXT:    s_load_dword s0, s[8:9], 0x1
135; CI-SDAG-NEXT:    s_load_dword s1, s[8:9], 0x32
136; CI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
137; CI-SDAG-NEXT:    s_cmp_eq_u32 s0, s1
138; CI-SDAG-NEXT:    s_cselect_b64 s[0:1], -1, 0
139; CI-SDAG-NEXT:    s_andn2_b64 vcc, exec, s[0:1]
140; CI-SDAG-NEXT:    s_cbranch_vccnz .LBB1_2
141; CI-SDAG-NEXT:  ; %bb.1: ; %bb0
142; CI-SDAG-NEXT:    v_mov_b32_e32 v0, 0
143; CI-SDAG-NEXT:    flat_store_dword v[0:1], v0
144; CI-SDAG-NEXT:    s_waitcnt vmcnt(0)
145; CI-SDAG-NEXT:  .LBB1_2: ; %bb1
146; CI-SDAG-NEXT:    s_endpgm
147;
148; GFX9-SDAG-LABEL: is_private_sgpr:
149; GFX9-SDAG:       ; %bb.0:
150; GFX9-SDAG-NEXT:    s_load_dword s2, s[8:9], 0x4
151; GFX9-SDAG-NEXT:    s_mov_b64 s[0:1], src_private_base
152; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
153; GFX9-SDAG-NEXT:    s_cmp_eq_u32 s2, s1
154; GFX9-SDAG-NEXT:    s_cselect_b64 s[0:1], -1, 0
155; GFX9-SDAG-NEXT:    s_andn2_b64 vcc, exec, s[0:1]
156; GFX9-SDAG-NEXT:    s_cbranch_vccnz .LBB1_2
157; GFX9-SDAG-NEXT:  ; %bb.1: ; %bb0
158; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, 0
159; GFX9-SDAG-NEXT:    global_store_dword v[0:1], v0, off
160; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
161; GFX9-SDAG-NEXT:  .LBB1_2: ; %bb1
162; GFX9-SDAG-NEXT:    s_endpgm
163;
164; CI-GISEL-LABEL: is_private_sgpr:
165; CI-GISEL:       ; %bb.0:
166; CI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
167; CI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
168; CI-GISEL-NEXT:    s_load_dword s0, s[8:9], 0x32
169; CI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
170; CI-GISEL-NEXT:    s_cmp_lg_u32 s1, s0
171; CI-GISEL-NEXT:    s_cbranch_scc1 .LBB1_2
172; CI-GISEL-NEXT:  ; %bb.1: ; %bb0
173; CI-GISEL-NEXT:    v_mov_b32_e32 v0, 0
174; CI-GISEL-NEXT:    flat_store_dword v[0:1], v0
175; CI-GISEL-NEXT:    s_waitcnt vmcnt(0)
176; CI-GISEL-NEXT:  .LBB1_2: ; %bb1
177; CI-GISEL-NEXT:    s_endpgm
178;
179; GFX9-GISEL-LABEL: is_private_sgpr:
180; GFX9-GISEL:       ; %bb.0:
181; GFX9-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
182; GFX9-GISEL-NEXT:    s_mov_b64 s[2:3], src_private_base
183; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
184; GFX9-GISEL-NEXT:    s_cmp_lg_u32 s1, s3
185; GFX9-GISEL-NEXT:    s_cbranch_scc1 .LBB1_2
186; GFX9-GISEL-NEXT:  ; %bb.1: ; %bb0
187; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 0
188; GFX9-GISEL-NEXT:    global_store_dword v[0:1], v0, off
189; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
190; GFX9-GISEL-NEXT:  .LBB1_2: ; %bb1
191; GFX9-GISEL-NEXT:    s_endpgm
192;
193; GFX10-LABEL: is_private_sgpr:
194; GFX10:       ; %bb.0:
195; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
196; GFX10-NEXT:    s_mov_b64 s[2:3], src_private_base
197; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
198; GFX10-NEXT:    s_cmp_lg_u32 s1, s3
199; GFX10-NEXT:    s_cbranch_scc1 .LBB1_2
200; GFX10-NEXT:  ; %bb.1: ; %bb0
201; GFX10-NEXT:    v_mov_b32_e32 v0, 0
202; GFX10-NEXT:    global_store_dword v[0:1], v0, off
203; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
204; GFX10-NEXT:  .LBB1_2: ; %bb1
205; GFX10-NEXT:    s_endpgm
206;
207; GFX11-LABEL: is_private_sgpr:
208; GFX11:       ; %bb.0:
209; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
210; GFX11-NEXT:    s_mov_b64 s[2:3], src_private_base
211; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
212; GFX11-NEXT:    s_cmp_lg_u32 s1, s3
213; GFX11-NEXT:    s_cbranch_scc1 .LBB1_2
214; GFX11-NEXT:  ; %bb.1: ; %bb0
215; GFX11-NEXT:    v_mov_b32_e32 v0, 0
216; GFX11-NEXT:    global_store_b32 v[0:1], v0, off dlc
217; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
218; GFX11-NEXT:  .LBB1_2: ; %bb1
219; GFX11-NEXT:    s_endpgm
220  %val = call i1 @llvm.amdgcn.is.private(ptr %ptr)
221  br i1 %val, label %bb0, label %bb1
222
223bb0:
224  store volatile i32 0, ptr addrspace(1) undef
225  br label %bb1
226
227bb1:
228  ret void
229}
230
231!llvm.module.flags = !{!0}
232!0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
233;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
234; CI: {{.*}}
235; GFX10-GISEL: {{.*}}
236; GFX11-GISEL: {{.*}}
237; SI-SDAG: {{.*}}
238