xref: /llvm-project/llvm/test/CodeGen/AMDGPU/s-barrier.ll (revision 3def49cb64ec1298290724081bd37dbdeb2ea5f8)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-SDAG %s
3; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-GISEL %s
4
5@bar = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison
6@bar2 = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison
7@bar3 = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison
8
9define void @func1() {
10; GFX12-SDAG-LABEL: func1:
11; GFX12-SDAG:       ; %bb.0:
12; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
13; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
14; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
15; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
16; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
17; GFX12-SDAG-NEXT:    s_mov_b32 m0, 0x70003
18; GFX12-SDAG-NEXT:    s_wait_storecnt 0x0
19; GFX12-SDAG-NEXT:    s_barrier_signal m0
20; GFX12-SDAG-NEXT:    s_mov_b32 m0, 3
21; GFX12-SDAG-NEXT:    s_barrier_join m0
22; GFX12-SDAG-NEXT:    s_barrier_wait 1
23; GFX12-SDAG-NEXT:    s_wait_alu 0xfffe
24; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
25;
26; GFX12-GISEL-LABEL: func1:
27; GFX12-GISEL:       ; %bb.0:
28; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
29; GFX12-GISEL-NEXT:    s_wait_expcnt 0x0
30; GFX12-GISEL-NEXT:    s_wait_samplecnt 0x0
31; GFX12-GISEL-NEXT:    s_wait_bvhcnt 0x0
32; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
33; GFX12-GISEL-NEXT:    s_mov_b32 m0, 0x70003
34; GFX12-GISEL-NEXT:    s_wait_storecnt 0x0
35; GFX12-GISEL-NEXT:    s_barrier_signal m0
36; GFX12-GISEL-NEXT:    s_barrier_join 3
37; GFX12-GISEL-NEXT:    s_barrier_wait 1
38; GFX12-GISEL-NEXT:    s_wait_alu 0xfffe
39; GFX12-GISEL-NEXT:    s_setpc_b64 s[30:31]
40    call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar3, i32 7)
41    call void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) @bar3)
42    call void @llvm.amdgcn.s.barrier.wait(i16 1)
43    ret void
44}
45
46define void @func2() {
47; GFX12-SDAG-LABEL: func2:
48; GFX12-SDAG:       ; %bb.0:
49; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
50; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
51; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
52; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
53; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
54; GFX12-SDAG-NEXT:    s_mov_b32 m0, 0x70001
55; GFX12-SDAG-NEXT:    s_wait_storecnt 0x0
56; GFX12-SDAG-NEXT:    s_barrier_signal m0
57; GFX12-SDAG-NEXT:    s_mov_b32 m0, 1
58; GFX12-SDAG-NEXT:    s_barrier_join m0
59; GFX12-SDAG-NEXT:    s_barrier_wait 1
60; GFX12-SDAG-NEXT:    s_wait_alu 0xfffe
61; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
62;
63; GFX12-GISEL-LABEL: func2:
64; GFX12-GISEL:       ; %bb.0:
65; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
66; GFX12-GISEL-NEXT:    s_wait_expcnt 0x0
67; GFX12-GISEL-NEXT:    s_wait_samplecnt 0x0
68; GFX12-GISEL-NEXT:    s_wait_bvhcnt 0x0
69; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
70; GFX12-GISEL-NEXT:    s_mov_b32 m0, 0x70001
71; GFX12-GISEL-NEXT:    s_wait_storecnt 0x0
72; GFX12-GISEL-NEXT:    s_barrier_signal m0
73; GFX12-GISEL-NEXT:    s_barrier_join 1
74; GFX12-GISEL-NEXT:    s_barrier_wait 1
75; GFX12-GISEL-NEXT:    s_wait_alu 0xfffe
76; GFX12-GISEL-NEXT:    s_setpc_b64 s[30:31]
77    call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar2, i32 7)
78    call void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) @bar2)
79    call void @llvm.amdgcn.s.barrier.wait(i16 1)
80    ret void
81}
82
83define amdgpu_kernel void @kernel1(ptr addrspace(1) %out, ptr addrspace(3) %in) #0 {
84; GFX12-SDAG-LABEL: kernel1:
85; GFX12-SDAG:       ; %bb.0:
86; GFX12-SDAG-NEXT:    s_mov_b64 s[10:11], s[6:7]
87; GFX12-SDAG-NEXT:    s_mov_b64 s[6:7], s[2:3]
88; GFX12-SDAG-NEXT:    s_load_b32 s2, s[4:5], 0x2c
89; GFX12-SDAG-NEXT:    s_mov_b32 m0, 0xc0002
90; GFX12-SDAG-NEXT:    v_mov_b32_e32 v31, v0
91; GFX12-SDAG-NEXT:    s_barrier_init m0
92; GFX12-SDAG-NEXT:    s_add_nc_u64 s[8:9], s[4:5], 48
93; GFX12-SDAG-NEXT:    s_mov_b64 s[4:5], s[0:1]
94; GFX12-SDAG-NEXT:    s_mov_b32 s32, 0
95; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
96; GFX12-SDAG-NEXT:    s_lshr_b32 s2, s2, 4
97; GFX12-SDAG-NEXT:    s_wait_alu 0xfffe
98; GFX12-SDAG-NEXT:    s_and_b32 s2, s2, 63
99; GFX12-SDAG-NEXT:    s_wait_alu 0xfffe
100; GFX12-SDAG-NEXT:    s_or_b32 s3, 0x90000, s2
101; GFX12-SDAG-NEXT:    s_wait_alu 0xfffe
102; GFX12-SDAG-NEXT:    s_mov_b32 m0, s3
103; GFX12-SDAG-NEXT:    s_barrier_init m0
104; GFX12-SDAG-NEXT:    s_mov_b32 m0, 0xc0002
105; GFX12-SDAG-NEXT:    s_barrier_signal m0
106; GFX12-SDAG-NEXT:    s_mov_b32 m0, s3
107; GFX12-SDAG-NEXT:    s_barrier_signal m0
108; GFX12-SDAG-NEXT:    s_mov_b32 m0, s2
109; GFX12-SDAG-NEXT:    s_barrier_signal -1
110; GFX12-SDAG-NEXT:    s_barrier_signal_isfirst -1
111; GFX12-SDAG-NEXT:    s_barrier_join m0
112; GFX12-SDAG-NEXT:    s_mov_b32 m0, 2
113; GFX12-SDAG-NEXT:    s_barrier_wait 1
114; GFX12-SDAG-NEXT:    s_barrier_leave
115; GFX12-SDAG-NEXT:    s_get_barrier_state s3, m0
116; GFX12-SDAG-NEXT:    s_mov_b32 m0, s2
117; GFX12-SDAG-NEXT:    s_get_barrier_state s2, m0
118; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
119; GFX12-SDAG-NEXT:    s_getpc_b64 s[2:3]
120; GFX12-SDAG-NEXT:    s_wait_alu 0xfffe
121; GFX12-SDAG-NEXT:    s_sext_i32_i16 s3, s3
122; GFX12-SDAG-NEXT:    s_add_co_u32 s2, s2, func1@gotpcrel32@lo+12
123; GFX12-SDAG-NEXT:    s_wait_alu 0xfffe
124; GFX12-SDAG-NEXT:    s_add_co_ci_u32 s3, s3, func1@gotpcrel32@hi+24
125; GFX12-SDAG-NEXT:    s_barrier_signal -1
126; GFX12-SDAG-NEXT:    s_load_b64 s[2:3], s[2:3], 0x0
127; GFX12-SDAG-NEXT:    s_barrier_wait -1
128; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
129; GFX12-SDAG-NEXT:    s_wait_alu 0xfffe
130; GFX12-SDAG-NEXT:    s_swappc_b64 s[30:31], s[2:3]
131; GFX12-SDAG-NEXT:    s_getpc_b64 s[2:3]
132; GFX12-SDAG-NEXT:    s_wait_alu 0xfffe
133; GFX12-SDAG-NEXT:    s_sext_i32_i16 s3, s3
134; GFX12-SDAG-NEXT:    s_add_co_u32 s2, s2, func2@gotpcrel32@lo+12
135; GFX12-SDAG-NEXT:    s_wait_alu 0xfffe
136; GFX12-SDAG-NEXT:    s_add_co_ci_u32 s3, s3, func2@gotpcrel32@hi+24
137; GFX12-SDAG-NEXT:    s_load_b64 s[2:3], s[2:3], 0x0
138; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
139; GFX12-SDAG-NEXT:    s_wait_alu 0xfffe
140; GFX12-SDAG-NEXT:    s_swappc_b64 s[30:31], s[2:3]
141; GFX12-SDAG-NEXT:    s_get_barrier_state s0, -1
142; GFX12-SDAG-NEXT:    s_endpgm
143;
144; GFX12-GISEL-LABEL: kernel1:
145; GFX12-GISEL:       ; %bb.0:
146; GFX12-GISEL-NEXT:    s_mov_b64 s[12:13], s[4:5]
147; GFX12-GISEL-NEXT:    s_mov_b64 s[4:5], s[0:1]
148; GFX12-GISEL-NEXT:    s_load_b32 s0, s[12:13], 0x2c
149; GFX12-GISEL-NEXT:    s_mov_b32 m0, 0xc0002
150; GFX12-GISEL-NEXT:    v_mov_b32_e32 v31, v0
151; GFX12-GISEL-NEXT:    s_barrier_init m0
152; GFX12-GISEL-NEXT:    s_mov_b64 s[10:11], s[6:7]
153; GFX12-GISEL-NEXT:    s_mov_b64 s[6:7], s[2:3]
154; GFX12-GISEL-NEXT:    s_mov_b32 s32, 0
155; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
156; GFX12-GISEL-NEXT:    s_lshr_b32 s0, s0, 4
157; GFX12-GISEL-NEXT:    s_wait_alu 0xfffe
158; GFX12-GISEL-NEXT:    s_and_b32 s0, s0, 63
159; GFX12-GISEL-NEXT:    s_wait_alu 0xfffe
160; GFX12-GISEL-NEXT:    s_or_b32 s1, s0, 0x90000
161; GFX12-GISEL-NEXT:    s_wait_alu 0xfffe
162; GFX12-GISEL-NEXT:    s_mov_b32 m0, s1
163; GFX12-GISEL-NEXT:    s_barrier_init m0
164; GFX12-GISEL-NEXT:    s_mov_b32 m0, 0xc0002
165; GFX12-GISEL-NEXT:    s_barrier_signal m0
166; GFX12-GISEL-NEXT:    s_mov_b32 m0, s1
167; GFX12-GISEL-NEXT:    s_barrier_signal m0
168; GFX12-GISEL-NEXT:    s_barrier_signal -1
169; GFX12-GISEL-NEXT:    s_barrier_signal_isfirst -1
170; GFX12-GISEL-NEXT:    s_mov_b32 m0, s0
171; GFX12-GISEL-NEXT:    s_add_co_u32 s8, s12, 48
172; GFX12-GISEL-NEXT:    s_barrier_join m0
173; GFX12-GISEL-NEXT:    s_barrier_wait 1
174; GFX12-GISEL-NEXT:    s_barrier_leave
175; GFX12-GISEL-NEXT:    s_get_barrier_state s0, 2
176; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
177; GFX12-GISEL-NEXT:    s_get_barrier_state s0, m0
178; GFX12-GISEL-NEXT:    s_add_co_ci_u32 s9, s13, 0
179; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
180; GFX12-GISEL-NEXT:    s_getpc_b64 s[0:1]
181; GFX12-GISEL-NEXT:    s_wait_alu 0xfffe
182; GFX12-GISEL-NEXT:    s_sext_i32_i16 s1, s1
183; GFX12-GISEL-NEXT:    s_add_co_u32 s0, s0, func1@gotpcrel32@lo+12
184; GFX12-GISEL-NEXT:    s_wait_alu 0xfffe
185; GFX12-GISEL-NEXT:    s_add_co_ci_u32 s1, s1, func1@gotpcrel32@hi+24
186; GFX12-GISEL-NEXT:    s_barrier_signal -1
187; GFX12-GISEL-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
188; GFX12-GISEL-NEXT:    s_barrier_wait -1
189; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
190; GFX12-GISEL-NEXT:    s_wait_alu 0xfffe
191; GFX12-GISEL-NEXT:    s_swappc_b64 s[30:31], s[0:1]
192; GFX12-GISEL-NEXT:    s_add_co_u32 s8, s12, 48
193; GFX12-GISEL-NEXT:    s_add_co_ci_u32 s9, s13, 0
194; GFX12-GISEL-NEXT:    s_getpc_b64 s[0:1]
195; GFX12-GISEL-NEXT:    s_wait_alu 0xfffe
196; GFX12-GISEL-NEXT:    s_sext_i32_i16 s1, s1
197; GFX12-GISEL-NEXT:    s_add_co_u32 s0, s0, func2@gotpcrel32@lo+12
198; GFX12-GISEL-NEXT:    s_wait_alu 0xfffe
199; GFX12-GISEL-NEXT:    s_add_co_ci_u32 s1, s1, func2@gotpcrel32@hi+24
200; GFX12-GISEL-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
201; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
202; GFX12-GISEL-NEXT:    s_wait_alu 0xfffe
203; GFX12-GISEL-NEXT:    s_swappc_b64 s[30:31], s[0:1]
204; GFX12-GISEL-NEXT:    s_get_barrier_state s0, -1
205; GFX12-GISEL-NEXT:    s_endpgm
206    call void @llvm.amdgcn.s.barrier.init(ptr addrspace(3) @bar, i32 12)
207    call void @llvm.amdgcn.s.barrier.init(ptr addrspace(3) %in, i32 9)
208    call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar, i32 12)
209    call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) %in, i32 9)
210    call void @llvm.amdgcn.s.barrier.signal(i32 -1)
211    %isfirst = call i1 @llvm.amdgcn.s.barrier.signal.isfirst(i32 -1)
212    call void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) %in)
213    call void @llvm.amdgcn.s.barrier.wait(i16 1)
214    call void @llvm.amdgcn.s.barrier.leave(i16 1)
215    %state = call i32 @llvm.amdgcn.s.get.named.barrier.state(ptr addrspace(3) @bar)
216    %state2 = call i32 @llvm.amdgcn.s.get.named.barrier.state(ptr addrspace(3) %in)
217    call void @llvm.amdgcn.s.barrier()
218    call void @func1()
219    call void @func2()
220    %state3 = call i32 @llvm.amdgcn.s.get.barrier.state(i32 -1)
221    ret void
222}
223
224define amdgpu_kernel void @kernel2(ptr addrspace(1) %out, ptr addrspace(3) %in) #0 {
225; GFX12-SDAG-LABEL: kernel2:
226; GFX12-SDAG:       ; %bb.0:
227; GFX12-SDAG-NEXT:    s_mov_b64 s[10:11], s[6:7]
228; GFX12-SDAG-NEXT:    s_getpc_b64 s[6:7]
229; GFX12-SDAG-NEXT:    s_wait_alu 0xfffe
230; GFX12-SDAG-NEXT:    s_sext_i32_i16 s7, s7
231; GFX12-SDAG-NEXT:    s_add_co_u32 s6, s6, func2@gotpcrel32@lo+12
232; GFX12-SDAG-NEXT:    s_wait_alu 0xfffe
233; GFX12-SDAG-NEXT:    s_add_co_ci_u32 s7, s7, func2@gotpcrel32@hi+24
234; GFX12-SDAG-NEXT:    v_mov_b32_e32 v31, v0
235; GFX12-SDAG-NEXT:    s_load_b64 s[12:13], s[6:7], 0x0
236; GFX12-SDAG-NEXT:    s_mov_b32 m0, 0x70002
237; GFX12-SDAG-NEXT:    s_add_nc_u64 s[8:9], s[4:5], 48
238; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
239; GFX12-SDAG-NEXT:    s_barrier_signal m0
240; GFX12-SDAG-NEXT:    s_mov_b32 m0, 2
241; GFX12-SDAG-NEXT:    s_mov_b64 s[4:5], s[0:1]
242; GFX12-SDAG-NEXT:    s_mov_b64 s[6:7], s[2:3]
243; GFX12-SDAG-NEXT:    s_mov_b32 s32, 0
244; GFX12-SDAG-NEXT:    s_barrier_join m0
245; GFX12-SDAG-NEXT:    s_barrier_wait 1
246; GFX12-SDAG-NEXT:    s_wait_alu 0xfffe
247; GFX12-SDAG-NEXT:    s_swappc_b64 s[30:31], s[12:13]
248; GFX12-SDAG-NEXT:    s_endpgm
249;
250; GFX12-GISEL-LABEL: kernel2:
251; GFX12-GISEL:       ; %bb.0:
252; GFX12-GISEL-NEXT:    s_add_co_u32 s8, s4, 48
253; GFX12-GISEL-NEXT:    s_add_co_ci_u32 s9, s5, 0
254; GFX12-GISEL-NEXT:    s_getpc_b64 s[4:5]
255; GFX12-GISEL-NEXT:    s_wait_alu 0xfffe
256; GFX12-GISEL-NEXT:    s_sext_i32_i16 s5, s5
257; GFX12-GISEL-NEXT:    s_add_co_u32 s4, s4, func2@gotpcrel32@lo+12
258; GFX12-GISEL-NEXT:    s_wait_alu 0xfffe
259; GFX12-GISEL-NEXT:    s_add_co_ci_u32 s5, s5, func2@gotpcrel32@hi+24
260; GFX12-GISEL-NEXT:    v_mov_b32_e32 v31, v0
261; GFX12-GISEL-NEXT:    s_load_b64 s[12:13], s[4:5], 0x0
262; GFX12-GISEL-NEXT:    s_mov_b64 s[10:11], s[6:7]
263; GFX12-GISEL-NEXT:    s_mov_b32 m0, 0x70002
264; GFX12-GISEL-NEXT:    s_mov_b64 s[4:5], s[0:1]
265; GFX12-GISEL-NEXT:    s_mov_b64 s[6:7], s[2:3]
266; GFX12-GISEL-NEXT:    s_mov_b32 s32, 0
267; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
268; GFX12-GISEL-NEXT:    s_barrier_signal m0
269; GFX12-GISEL-NEXT:    s_barrier_join 2
270; GFX12-GISEL-NEXT:    s_barrier_wait 1
271; GFX12-GISEL-NEXT:    s_wait_alu 0xfffe
272; GFX12-GISEL-NEXT:    s_swappc_b64 s[30:31], s[12:13]
273; GFX12-GISEL-NEXT:    s_endpgm
274    call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar, i32 7)
275    call void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) @bar)
276    call void @llvm.amdgcn.s.barrier.wait(i16 1)
277
278    call void @func2()
279    ret void
280}
281
282declare void @llvm.amdgcn.s.barrier() #1
283declare void @llvm.amdgcn.s.barrier.wait(i16) #1
284declare void @llvm.amdgcn.s.barrier.signal(i32) #1
285declare void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3), i32) #1
286declare i1 @llvm.amdgcn.s.barrier.signal.isfirst(i32) #1
287declare void @llvm.amdgcn.s.barrier.init(ptr addrspace(3), i32) #1
288declare void @llvm.amdgcn.s.barrier.join(ptr addrspace(3)) #1
289declare void @llvm.amdgcn.s.barrier.leave(i16) #1
290declare i32 @llvm.amdgcn.s.get.barrier.state(i32) #1
291declare i32 @llvm.amdgcn.s.get.named.barrier.state(ptr addrspace(3)) #1
292
293attributes #0 = { nounwind }
294attributes #1 = { convergent nounwind }
295attributes #2 = { nounwind readnone }
296