xref: /llvm-project/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI %s
3; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck --check-prefix=FLAT %s
4
5define amdgpu_kernel void @break_inserted_outside_of_loop(ptr addrspace(1) %out, i32 %a) {
6; SI-LABEL: break_inserted_outside_of_loop:
7; SI:       ; %bb.0: ; %main_body
8; SI-NEXT:    s_load_dword s0, s[4:5], 0xb
9; SI-NEXT:    v_mbcnt_lo_u32_b32_e64 v0, -1, 0
10; SI-NEXT:    s_waitcnt lgkmcnt(0)
11; SI-NEXT:    v_and_b32_e32 v0, s0, v0
12; SI-NEXT:    v_and_b32_e32 v0, 1, v0
13; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
14; SI-NEXT:    s_mov_b64 s[0:1], 0
15; SI-NEXT:  .LBB0_1: ; %ENDIF
16; SI-NEXT:    ; =>This Inner Loop Header: Depth=1
17; SI-NEXT:    s_and_b64 s[2:3], exec, vcc
18; SI-NEXT:    s_or_b64 s[0:1], s[2:3], s[0:1]
19; SI-NEXT:    s_andn2_b64 exec, exec, s[0:1]
20; SI-NEXT:    s_cbranch_execnz .LBB0_1
21; SI-NEXT:  ; %bb.2: ; %ENDLOOP
22; SI-NEXT:    s_or_b64 exec, exec, s[0:1]
23; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
24; SI-NEXT:    s_mov_b32 s3, 0xf000
25; SI-NEXT:    s_mov_b32 s2, -1
26; SI-NEXT:    v_mov_b32_e32 v0, 0
27; SI-NEXT:    s_waitcnt lgkmcnt(0)
28; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
29; SI-NEXT:    s_endpgm
30;
31; FLAT-LABEL: break_inserted_outside_of_loop:
32; FLAT:       ; %bb.0: ; %main_body
33; FLAT-NEXT:    s_load_dword s0, s[4:5], 0x2c
34; FLAT-NEXT:    v_mbcnt_lo_u32_b32 v0, -1, 0
35; FLAT-NEXT:    s_waitcnt lgkmcnt(0)
36; FLAT-NEXT:    v_and_b32_e32 v0, s0, v0
37; FLAT-NEXT:    v_and_b32_e32 v0, 1, v0
38; FLAT-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
39; FLAT-NEXT:    s_mov_b64 s[0:1], 0
40; FLAT-NEXT:  .LBB0_1: ; %ENDIF
41; FLAT-NEXT:    ; =>This Inner Loop Header: Depth=1
42; FLAT-NEXT:    s_and_b64 s[2:3], exec, vcc
43; FLAT-NEXT:    s_or_b64 s[0:1], s[2:3], s[0:1]
44; FLAT-NEXT:    s_andn2_b64 exec, exec, s[0:1]
45; FLAT-NEXT:    s_cbranch_execnz .LBB0_1
46; FLAT-NEXT:  ; %bb.2: ; %ENDLOOP
47; FLAT-NEXT:    s_or_b64 exec, exec, s[0:1]
48; FLAT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
49; FLAT-NEXT:    s_mov_b32 s3, 0xf000
50; FLAT-NEXT:    s_mov_b32 s2, -1
51; FLAT-NEXT:    v_mov_b32_e32 v0, 0
52; FLAT-NEXT:    s_waitcnt lgkmcnt(0)
53; FLAT-NEXT:    buffer_store_dword v0, off, s[0:3], 0
54; FLAT-NEXT:    s_endpgm
55main_body:
56  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
57  %0 = and i32 %a, %tid
58  %1 = trunc i32 %0 to i1
59  br label %ENDIF
60
61ENDLOOP:
62  store i32 0, ptr addrspace(1) %out
63  ret void
64
65ENDIF:
66  br i1 %1, label %ENDLOOP, label %ENDIF
67}
68
69define amdgpu_kernel void @phi_cond_outside_loop(i32 %b) {
70; SI-LABEL: phi_cond_outside_loop:
71; SI:       ; %bb.0: ; %entry
72; SI-NEXT:    v_mbcnt_lo_u32_b32_e64 v0, -1, 0
73; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
74; SI-NEXT:    s_mov_b64 s[0:1], 0
75; SI-NEXT:    s_mov_b64 s[2:3], 0
76; SI-NEXT:    s_and_saveexec_b64 s[6:7], vcc
77; SI-NEXT:    s_cbranch_execz .LBB1_2
78; SI-NEXT:  ; %bb.1: ; %else
79; SI-NEXT:    s_load_dword s2, s[4:5], 0x9
80; SI-NEXT:    s_waitcnt lgkmcnt(0)
81; SI-NEXT:    s_cmp_eq_u32 s2, 0
82; SI-NEXT:    s_cselect_b64 s[2:3], -1, 0
83; SI-NEXT:    s_and_b64 s[2:3], s[2:3], exec
84; SI-NEXT:  .LBB1_2: ; %endif
85; SI-NEXT:    s_or_b64 exec, exec, s[6:7]
86; SI-NEXT:  .LBB1_3: ; %loop
87; SI-NEXT:    ; =>This Inner Loop Header: Depth=1
88; SI-NEXT:    s_and_b64 s[4:5], exec, s[2:3]
89; SI-NEXT:    s_or_b64 s[0:1], s[4:5], s[0:1]
90; SI-NEXT:    s_andn2_b64 exec, exec, s[0:1]
91; SI-NEXT:    s_cbranch_execnz .LBB1_3
92; SI-NEXT:  ; %bb.4: ; %exit
93; SI-NEXT:    s_endpgm
94;
95; FLAT-LABEL: phi_cond_outside_loop:
96; FLAT:       ; %bb.0: ; %entry
97; FLAT-NEXT:    v_mbcnt_lo_u32_b32 v0, -1, 0
98; FLAT-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
99; FLAT-NEXT:    s_mov_b64 s[0:1], 0
100; FLAT-NEXT:    s_mov_b64 s[2:3], 0
101; FLAT-NEXT:    s_and_saveexec_b64 s[6:7], vcc
102; FLAT-NEXT:    s_cbranch_execz .LBB1_2
103; FLAT-NEXT:  ; %bb.1: ; %else
104; FLAT-NEXT:    s_load_dword s2, s[4:5], 0x24
105; FLAT-NEXT:    s_waitcnt lgkmcnt(0)
106; FLAT-NEXT:    s_cmp_eq_u32 s2, 0
107; FLAT-NEXT:    s_cselect_b64 s[2:3], -1, 0
108; FLAT-NEXT:    s_and_b64 s[2:3], s[2:3], exec
109; FLAT-NEXT:  .LBB1_2: ; %endif
110; FLAT-NEXT:    s_or_b64 exec, exec, s[6:7]
111; FLAT-NEXT:  .LBB1_3: ; %loop
112; FLAT-NEXT:    ; =>This Inner Loop Header: Depth=1
113; FLAT-NEXT:    s_and_b64 s[4:5], exec, s[2:3]
114; FLAT-NEXT:    s_or_b64 s[0:1], s[4:5], s[0:1]
115; FLAT-NEXT:    s_andn2_b64 exec, exec, s[0:1]
116; FLAT-NEXT:    s_cbranch_execnz .LBB1_3
117; FLAT-NEXT:  ; %bb.4: ; %exit
118; FLAT-NEXT:    s_endpgm
119entry:
120  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
121  %0 = icmp eq i32 %tid , 0
122  br i1 %0, label %if, label %else
123
124if:
125  br label %endif
126
127else:
128  %1 = icmp eq i32 %b, 0
129  br label %endif
130
131endif:
132  %2 = phi i1 [0, %if], [%1, %else]
133  br label %loop
134
135loop:
136  br i1 %2, label %exit, label %loop
137
138exit:
139  ret void
140}
141
142define amdgpu_kernel void @switch_unreachable(ptr addrspace(1) %g, ptr addrspace(3) %l, i32 %x) nounwind {
143; SI-LABEL: switch_unreachable:
144; SI:       ; %bb.0: ; %centry
145;
146; FLAT-LABEL: switch_unreachable:
147; FLAT:       ; %bb.0: ; %centry
148centry:
149  switch i32 %x, label %sw.default [
150    i32 0, label %sw.bb
151    i32 60, label %sw.bb
152  ]
153
154sw.bb:
155  unreachable
156
157sw.default:
158  unreachable
159
160sw.epilog:
161  ret void
162}
163
164declare float @llvm.fabs.f32(float) nounwind readnone
165
166define amdgpu_kernel void @loop_land_info_assert(i32 %c0, i32 %c1, i32 %c2, i32 %c3, i32 %x, i32 %y, i1 %arg) nounwind {
167; SI-LABEL: loop_land_info_assert:
168; SI:       ; %bb.0: ; %entry
169; SI-NEXT:    s_load_dword s0, s[4:5], 0xa
170; SI-NEXT:    s_waitcnt lgkmcnt(0)
171; SI-NEXT:    s_cmp_lt_i32 s0, 4
172; SI-NEXT:    s_cbranch_scc1 .LBB3_4
173; SI-NEXT:  ; %bb.1: ; %for.cond.preheader
174; SI-NEXT:    s_load_dword s0, s[4:5], 0xc
175; SI-NEXT:    s_waitcnt lgkmcnt(0)
176; SI-NEXT:    s_cmpk_lt_i32 s0, 0x3e8
177; SI-NEXT:    s_cbranch_scc0 .LBB3_4
178; SI-NEXT:  ; %bb.2: ; %for.body
179; SI-NEXT:    s_and_b64 vcc, exec, 0
180; SI-NEXT:  .LBB3_3: ; %self.loop
181; SI-NEXT:    ; =>This Inner Loop Header: Depth=1
182; SI-NEXT:    s_mov_b64 vcc, vcc
183; SI-NEXT:    s_cbranch_vccz .LBB3_3
184; SI-NEXT:  .LBB3_4: ; %DummyReturnBlock
185; SI-NEXT:    s_endpgm
186;
187; FLAT-LABEL: loop_land_info_assert:
188; FLAT:       ; %bb.0: ; %entry
189; FLAT-NEXT:    s_load_dword s0, s[4:5], 0x28
190; FLAT-NEXT:    s_waitcnt lgkmcnt(0)
191; FLAT-NEXT:    s_cmp_lt_i32 s0, 4
192; FLAT-NEXT:    s_cbranch_scc1 .LBB3_4
193; FLAT-NEXT:  ; %bb.1: ; %for.cond.preheader
194; FLAT-NEXT:    s_load_dword s0, s[4:5], 0x30
195; FLAT-NEXT:    s_waitcnt lgkmcnt(0)
196; FLAT-NEXT:    s_cmpk_lt_i32 s0, 0x3e8
197; FLAT-NEXT:    s_cbranch_scc0 .LBB3_4
198; FLAT-NEXT:  ; %bb.2: ; %for.body
199; FLAT-NEXT:    s_and_b64 vcc, exec, 0
200; FLAT-NEXT:  .LBB3_3: ; %self.loop
201; FLAT-NEXT:    ; =>This Inner Loop Header: Depth=1
202; FLAT-NEXT:    s_mov_b64 vcc, vcc
203; FLAT-NEXT:    s_cbranch_vccz .LBB3_3
204; FLAT-NEXT:  .LBB3_4: ; %DummyReturnBlock
205; FLAT-NEXT:    s_endpgm
206entry:
207  %cmp = icmp sgt i32 %c0, 0
208  br label %while.cond.outer
209
210while.cond.outer:
211  %tmp = load float, ptr addrspace(1) undef
212  br label %while.cond
213
214while.cond:
215  %cmp1 = icmp slt i32 %c1, 4
216  br i1 %cmp1, label %convex.exit, label %for.cond
217
218convex.exit:
219  %or = or i1 %cmp, %cmp1
220  br i1 %or, label %return, label %if.end
221
222if.end:
223  %tmp3 = call float @llvm.fabs.f32(float %tmp) nounwind readnone
224  %cmp2 = fcmp olt float %tmp3, 0x3E80000000000000
225  br i1 %cmp2, label %if.else, label %while.cond.outer
226
227if.else:
228  store volatile i32 3, ptr addrspace(1) undef, align 4
229  br label %while.cond
230
231for.cond:
232  %cmp3 = icmp slt i32 %c3, 1000
233  br i1 %cmp3, label %for.body, label %return
234
235for.body:
236  br i1 %cmp3, label %self.loop, label %if.end.2
237
238if.end.2:
239  %or.cond2 = or i1 %cmp3, %arg
240  br i1 %or.cond2, label %return, label %for.cond
241
242self.loop:
243 br label %self.loop
244
245return:
246  ret void
247}
248
249declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
250
251attributes #0 = { nounwind readnone }
252