xref: /llvm-project/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll (revision 40fa7f5e8b315159d45aa280c771af5998bdc75e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,WAVE64,SI %s
3; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,WAVE64,GFX10-WAVE64 %s
4; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,GFX10-WAVE32 %s
5; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
6
7define amdgpu_ps void @test_kill_depth_0_imm_pos() #0 {
8; GCN-LABEL: test_kill_depth_0_imm_pos:
9; GCN:       ; %bb.0:
10; GCN-NEXT:    s_endpgm
11  call void @llvm.amdgcn.kill(i1 true)
12  ret void
13}
14
15define amdgpu_ps void @test_kill_depth_0_imm_neg() #0 {
16; WAVE64-LABEL: test_kill_depth_0_imm_neg:
17; WAVE64:       ; %bb.0:
18; WAVE64-NEXT:    s_andn2_b64 exec, exec, exec
19; WAVE64-NEXT:    s_cbranch_scc0 .LBB1_1
20; WAVE64-NEXT:    s_endpgm
21; WAVE64-NEXT:  .LBB1_1:
22; WAVE64-NEXT:    s_mov_b64 exec, 0
23; WAVE64-NEXT:    exp null off, off, off, off done vm
24; WAVE64-NEXT:    s_endpgm
25;
26; GFX10-WAVE32-LABEL: test_kill_depth_0_imm_neg:
27; GFX10-WAVE32:       ; %bb.0:
28; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, exec_lo
29; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB1_1
30; GFX10-WAVE32-NEXT:    s_endpgm
31; GFX10-WAVE32-NEXT:  .LBB1_1:
32; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
33; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
34; GFX10-WAVE32-NEXT:    s_endpgm
35;
36; GFX11-LABEL: test_kill_depth_0_imm_neg:
37; GFX11:       ; %bb.0:
38; GFX11-NEXT:    s_and_not1_b64 exec, exec, exec
39; GFX11-NEXT:    s_cbranch_scc0 .LBB1_1
40; GFX11-NEXT:    s_endpgm
41; GFX11-NEXT:  .LBB1_1:
42; GFX11-NEXT:    s_mov_b64 exec, 0
43; GFX11-NEXT:    exp mrt0 off, off, off, off done
44; GFX11-NEXT:    s_endpgm
45  call void @llvm.amdgcn.kill(i1 false)
46  ret void
47}
48
49; FIXME: Ideally only one early-exit would be emitted
50define amdgpu_ps void @test_kill_depth_0_imm_neg_x2() #0 {
51; WAVE64-LABEL: test_kill_depth_0_imm_neg_x2:
52; WAVE64:       ; %bb.0:
53; WAVE64-NEXT:    s_mov_b64 s[0:1], exec
54; WAVE64-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
55; WAVE64-NEXT:    s_cbranch_scc0 .LBB2_2
56; WAVE64-NEXT:  ; %bb.1:
57; WAVE64-NEXT:    s_mov_b64 exec, 0
58; WAVE64-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
59; WAVE64-NEXT:    s_cbranch_scc0 .LBB2_2
60; WAVE64-NEXT:    s_endpgm
61; WAVE64-NEXT:  .LBB2_2:
62; WAVE64-NEXT:    s_mov_b64 exec, 0
63; WAVE64-NEXT:    exp null off, off, off, off done vm
64; WAVE64-NEXT:    s_endpgm
65;
66; GFX10-WAVE32-LABEL: test_kill_depth_0_imm_neg_x2:
67; GFX10-WAVE32:       ; %bb.0:
68; GFX10-WAVE32-NEXT:    s_mov_b32 s0, exec_lo
69; GFX10-WAVE32-NEXT:    s_andn2_b32 s0, s0, exec_lo
70; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB2_2
71; GFX10-WAVE32-NEXT:  ; %bb.1:
72; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
73; GFX10-WAVE32-NEXT:    s_andn2_b32 s0, s0, exec_lo
74; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB2_2
75; GFX10-WAVE32-NEXT:    s_endpgm
76; GFX10-WAVE32-NEXT:  .LBB2_2:
77; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
78; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
79; GFX10-WAVE32-NEXT:    s_endpgm
80;
81; GFX11-LABEL: test_kill_depth_0_imm_neg_x2:
82; GFX11:       ; %bb.0:
83; GFX11-NEXT:    s_mov_b64 s[0:1], exec
84; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
85; GFX11-NEXT:    s_and_not1_b64 s[0:1], s[0:1], exec
86; GFX11-NEXT:    s_cbranch_scc0 .LBB2_2
87; GFX11-NEXT:  ; %bb.1:
88; GFX11-NEXT:    s_mov_b64 exec, 0
89; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
90; GFX11-NEXT:    s_and_not1_b64 s[0:1], s[0:1], exec
91; GFX11-NEXT:    s_cbranch_scc0 .LBB2_2
92; GFX11-NEXT:    s_endpgm
93; GFX11-NEXT:  .LBB2_2:
94; GFX11-NEXT:    s_mov_b64 exec, 0
95; GFX11-NEXT:    exp mrt0 off, off, off, off done
96; GFX11-NEXT:    s_endpgm
97  call void @llvm.amdgcn.kill(i1 false)
98  call void @llvm.amdgcn.kill(i1 false)
99  ret void
100}
101
102define amdgpu_ps void @test_kill_depth_var(float %x) #0 {
103; WAVE64-LABEL: test_kill_depth_var:
104; WAVE64:       ; %bb.0:
105; WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
106; WAVE64-NEXT:    s_andn2_b64 exec, exec, vcc
107; WAVE64-NEXT:    s_cbranch_scc0 .LBB3_1
108; WAVE64-NEXT:    s_endpgm
109; WAVE64-NEXT:  .LBB3_1:
110; WAVE64-NEXT:    s_mov_b64 exec, 0
111; WAVE64-NEXT:    exp null off, off, off, off done vm
112; WAVE64-NEXT:    s_endpgm
113;
114; GFX10-WAVE32-LABEL: test_kill_depth_var:
115; GFX10-WAVE32:       ; %bb.0:
116; GFX10-WAVE32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v0
117; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, vcc_lo
118; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB3_1
119; GFX10-WAVE32-NEXT:    s_endpgm
120; GFX10-WAVE32-NEXT:  .LBB3_1:
121; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
122; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
123; GFX10-WAVE32-NEXT:    s_endpgm
124;
125; GFX11-LABEL: test_kill_depth_var:
126; GFX11:       ; %bb.0:
127; GFX11-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
128; GFX11-NEXT:    s_and_not1_b64 exec, exec, vcc
129; GFX11-NEXT:    s_cbranch_scc0 .LBB3_1
130; GFX11-NEXT:    s_endpgm
131; GFX11-NEXT:  .LBB3_1:
132; GFX11-NEXT:    s_mov_b64 exec, 0
133; GFX11-NEXT:    exp mrt0 off, off, off, off done
134; GFX11-NEXT:    s_endpgm
135  %cmp = fcmp olt float %x, 0.0
136  call void @llvm.amdgcn.kill(i1 %cmp)
137  ret void
138}
139
140; FIXME: Ideally only one early-exit would be emitted
141define amdgpu_ps void @test_kill_depth_var_x2_same(float %x) #0 {
142; SI-LABEL: test_kill_depth_var_x2_same:
143; SI:       ; %bb.0:
144; SI-NEXT:    s_mov_b64 s[0:1], exec
145; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
146; SI-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
147; SI-NEXT:    s_cbranch_scc0 .LBB4_2
148; SI-NEXT:  ; %bb.1:
149; SI-NEXT:    s_andn2_b64 exec, exec, vcc
150; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
151; SI-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
152; SI-NEXT:    s_cbranch_scc0 .LBB4_2
153; SI-NEXT:    s_endpgm
154; SI-NEXT:  .LBB4_2:
155; SI-NEXT:    s_mov_b64 exec, 0
156; SI-NEXT:    exp null off, off, off, off done vm
157; SI-NEXT:    s_endpgm
158;
159; GFX10-WAVE64-LABEL: test_kill_depth_var_x2_same:
160; GFX10-WAVE64:       ; %bb.0:
161; GFX10-WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
162; GFX10-WAVE64-NEXT:    s_mov_b64 s[0:1], exec
163; GFX10-WAVE64-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
164; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB4_2
165; GFX10-WAVE64-NEXT:  ; %bb.1:
166; GFX10-WAVE64-NEXT:    s_andn2_b64 exec, exec, vcc
167; GFX10-WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
168; GFX10-WAVE64-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
169; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB4_2
170; GFX10-WAVE64-NEXT:    s_endpgm
171; GFX10-WAVE64-NEXT:  .LBB4_2:
172; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
173; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
174; GFX10-WAVE64-NEXT:    s_endpgm
175;
176; GFX10-WAVE32-LABEL: test_kill_depth_var_x2_same:
177; GFX10-WAVE32:       ; %bb.0:
178; GFX10-WAVE32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v0
179; GFX10-WAVE32-NEXT:    s_mov_b32 s0, exec_lo
180; GFX10-WAVE32-NEXT:    s_andn2_b32 s0, s0, vcc_lo
181; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB4_2
182; GFX10-WAVE32-NEXT:  ; %bb.1:
183; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, vcc_lo
184; GFX10-WAVE32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v0
185; GFX10-WAVE32-NEXT:    s_andn2_b32 s0, s0, vcc_lo
186; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB4_2
187; GFX10-WAVE32-NEXT:    s_endpgm
188; GFX10-WAVE32-NEXT:  .LBB4_2:
189; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
190; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
191; GFX10-WAVE32-NEXT:    s_endpgm
192;
193; GFX11-LABEL: test_kill_depth_var_x2_same:
194; GFX11:       ; %bb.0:
195; GFX11-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
196; GFX11-NEXT:    s_mov_b64 s[0:1], exec
197; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
198; GFX11-NEXT:    s_and_not1_b64 s[0:1], s[0:1], vcc
199; GFX11-NEXT:    s_cbranch_scc0 .LBB4_2
200; GFX11-NEXT:  ; %bb.1:
201; GFX11-NEXT:    s_and_not1_b64 exec, exec, vcc
202; GFX11-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
203; GFX11-NEXT:    s_and_not1_b64 s[0:1], s[0:1], vcc
204; GFX11-NEXT:    s_cbranch_scc0 .LBB4_2
205; GFX11-NEXT:    s_endpgm
206; GFX11-NEXT:  .LBB4_2:
207; GFX11-NEXT:    s_mov_b64 exec, 0
208; GFX11-NEXT:    exp mrt0 off, off, off, off done
209; GFX11-NEXT:    s_endpgm
210  %cmp = fcmp olt float %x, 0.0
211  call void @llvm.amdgcn.kill(i1 %cmp)
212  call void @llvm.amdgcn.kill(i1 %cmp)
213  ret void
214}
215
216; FIXME: Ideally only one early-exit would be emitted
217define amdgpu_ps void @test_kill_depth_var_x2(float %x, float %y) #0 {
218; SI-LABEL: test_kill_depth_var_x2:
219; SI:       ; %bb.0:
220; SI-NEXT:    s_mov_b64 s[0:1], exec
221; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
222; SI-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
223; SI-NEXT:    s_cbranch_scc0 .LBB5_2
224; SI-NEXT:  ; %bb.1:
225; SI-NEXT:    s_andn2_b64 exec, exec, vcc
226; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v1
227; SI-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
228; SI-NEXT:    s_cbranch_scc0 .LBB5_2
229; SI-NEXT:    s_endpgm
230; SI-NEXT:  .LBB5_2:
231; SI-NEXT:    s_mov_b64 exec, 0
232; SI-NEXT:    exp null off, off, off, off done vm
233; SI-NEXT:    s_endpgm
234;
235; GFX10-WAVE64-LABEL: test_kill_depth_var_x2:
236; GFX10-WAVE64:       ; %bb.0:
237; GFX10-WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
238; GFX10-WAVE64-NEXT:    s_mov_b64 s[0:1], exec
239; GFX10-WAVE64-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
240; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB5_2
241; GFX10-WAVE64-NEXT:  ; %bb.1:
242; GFX10-WAVE64-NEXT:    s_andn2_b64 exec, exec, vcc
243; GFX10-WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v1
244; GFX10-WAVE64-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
245; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB5_2
246; GFX10-WAVE64-NEXT:    s_endpgm
247; GFX10-WAVE64-NEXT:  .LBB5_2:
248; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
249; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
250; GFX10-WAVE64-NEXT:    s_endpgm
251;
252; GFX10-WAVE32-LABEL: test_kill_depth_var_x2:
253; GFX10-WAVE32:       ; %bb.0:
254; GFX10-WAVE32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v0
255; GFX10-WAVE32-NEXT:    s_mov_b32 s0, exec_lo
256; GFX10-WAVE32-NEXT:    s_andn2_b32 s0, s0, vcc_lo
257; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB5_2
258; GFX10-WAVE32-NEXT:  ; %bb.1:
259; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, vcc_lo
260; GFX10-WAVE32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v1
261; GFX10-WAVE32-NEXT:    s_andn2_b32 s0, s0, vcc_lo
262; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB5_2
263; GFX10-WAVE32-NEXT:    s_endpgm
264; GFX10-WAVE32-NEXT:  .LBB5_2:
265; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
266; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
267; GFX10-WAVE32-NEXT:    s_endpgm
268;
269; GFX11-LABEL: test_kill_depth_var_x2:
270; GFX11:       ; %bb.0:
271; GFX11-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
272; GFX11-NEXT:    s_mov_b64 s[0:1], exec
273; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
274; GFX11-NEXT:    s_and_not1_b64 s[0:1], s[0:1], vcc
275; GFX11-NEXT:    s_cbranch_scc0 .LBB5_2
276; GFX11-NEXT:  ; %bb.1:
277; GFX11-NEXT:    s_and_not1_b64 exec, exec, vcc
278; GFX11-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v1
279; GFX11-NEXT:    s_and_not1_b64 s[0:1], s[0:1], vcc
280; GFX11-NEXT:    s_cbranch_scc0 .LBB5_2
281; GFX11-NEXT:    s_endpgm
282; GFX11-NEXT:  .LBB5_2:
283; GFX11-NEXT:    s_mov_b64 exec, 0
284; GFX11-NEXT:    exp mrt0 off, off, off, off done
285; GFX11-NEXT:    s_endpgm
286  %cmp.x = fcmp olt float %x, 0.0
287  call void @llvm.amdgcn.kill(i1 %cmp.x)
288  %cmp.y = fcmp olt float %y, 0.0
289  call void @llvm.amdgcn.kill(i1 %cmp.y)
290  ret void
291}
292
293define amdgpu_ps void @test_kill_depth_var_x2_instructions(float %x) #0 {
294; SI-LABEL: test_kill_depth_var_x2_instructions:
295; SI:       ; %bb.0:
296; SI-NEXT:    s_mov_b64 s[0:1], exec
297; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
298; SI-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
299; SI-NEXT:    s_cbranch_scc0 .LBB6_2
300; SI-NEXT:  ; %bb.1:
301; SI-NEXT:    s_andn2_b64 exec, exec, vcc
302; SI-NEXT:    ;;#ASMSTART
303; SI-NEXT:    v_mov_b32_e64 v7, -1
304; SI-NEXT:    ;;#ASMEND
305; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v7
306; SI-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
307; SI-NEXT:    s_cbranch_scc0 .LBB6_2
308; SI-NEXT:    s_endpgm
309; SI-NEXT:  .LBB6_2:
310; SI-NEXT:    s_mov_b64 exec, 0
311; SI-NEXT:    exp null off, off, off, off done vm
312; SI-NEXT:    s_endpgm
313;
314; GFX10-WAVE64-LABEL: test_kill_depth_var_x2_instructions:
315; GFX10-WAVE64:       ; %bb.0:
316; GFX10-WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
317; GFX10-WAVE64-NEXT:    s_mov_b64 s[0:1], exec
318; GFX10-WAVE64-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
319; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB6_2
320; GFX10-WAVE64-NEXT:  ; %bb.1:
321; GFX10-WAVE64-NEXT:    s_andn2_b64 exec, exec, vcc
322; GFX10-WAVE64-NEXT:    ;;#ASMSTART
323; GFX10-WAVE64-NEXT:    v_mov_b32_e64 v7, -1
324; GFX10-WAVE64-NEXT:    ;;#ASMEND
325; GFX10-WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v7
326; GFX10-WAVE64-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
327; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB6_2
328; GFX10-WAVE64-NEXT:    s_endpgm
329; GFX10-WAVE64-NEXT:  .LBB6_2:
330; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
331; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
332; GFX10-WAVE64-NEXT:    s_endpgm
333;
334; GFX10-WAVE32-LABEL: test_kill_depth_var_x2_instructions:
335; GFX10-WAVE32:       ; %bb.0:
336; GFX10-WAVE32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v0
337; GFX10-WAVE32-NEXT:    s_mov_b32 s0, exec_lo
338; GFX10-WAVE32-NEXT:    s_andn2_b32 s0, s0, vcc_lo
339; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB6_2
340; GFX10-WAVE32-NEXT:  ; %bb.1:
341; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, vcc_lo
342; GFX10-WAVE32-NEXT:    ;;#ASMSTART
343; GFX10-WAVE32-NEXT:    v_mov_b32_e64 v7, -1
344; GFX10-WAVE32-NEXT:    ;;#ASMEND
345; GFX10-WAVE32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v7
346; GFX10-WAVE32-NEXT:    s_andn2_b32 s0, s0, vcc_lo
347; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB6_2
348; GFX10-WAVE32-NEXT:    s_endpgm
349; GFX10-WAVE32-NEXT:  .LBB6_2:
350; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
351; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
352; GFX10-WAVE32-NEXT:    s_endpgm
353;
354; GFX11-LABEL: test_kill_depth_var_x2_instructions:
355; GFX11:       ; %bb.0:
356; GFX11-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
357; GFX11-NEXT:    s_mov_b64 s[0:1], exec
358; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
359; GFX11-NEXT:    s_and_not1_b64 s[0:1], s[0:1], vcc
360; GFX11-NEXT:    s_cbranch_scc0 .LBB6_2
361; GFX11-NEXT:  ; %bb.1:
362; GFX11-NEXT:    s_and_not1_b64 exec, exec, vcc
363; GFX11-NEXT:    ;;#ASMSTART
364; GFX11-NEXT:    v_mov_b32_e64 v7, -1
365; GFX11-NEXT:    ;;#ASMEND
366; GFX11-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v7
367; GFX11-NEXT:    s_and_not1_b64 s[0:1], s[0:1], vcc
368; GFX11-NEXT:    s_cbranch_scc0 .LBB6_2
369; GFX11-NEXT:    s_endpgm
370; GFX11-NEXT:  .LBB6_2:
371; GFX11-NEXT:    s_mov_b64 exec, 0
372; GFX11-NEXT:    exp mrt0 off, off, off, off done
373; GFX11-NEXT:    s_endpgm
374  %cmp.x = fcmp olt float %x, 0.0
375  call void @llvm.amdgcn.kill(i1 %cmp.x)
376  %y = call float asm sideeffect "v_mov_b32_e64 v7, -1", "={v7}"()
377  %cmp.y = fcmp olt float %y, 0.0
378  call void @llvm.amdgcn.kill(i1 %cmp.y)
379  ret void
380}
381
382; FIXME: why does the skip depend on the asm length in the same block?
383define amdgpu_ps float @test_kill_control_flow(i32 inreg %arg) #0 {
384; SI-LABEL: test_kill_control_flow:
385; SI:       ; %bb.0: ; %entry
386; SI-NEXT:    s_cmp_lg_u32 s0, 0
387; SI-NEXT:    s_cbranch_scc0 .LBB7_2
388; SI-NEXT:  ; %bb.1: ; %exit
389; SI-NEXT:    v_mov_b32_e32 v0, 1.0
390; SI-NEXT:    s_branch .LBB7_5
391; SI-NEXT:  .LBB7_2: ; %bb
392; SI-NEXT:    s_mov_b64 s[2:3], exec
393; SI-NEXT:    ;;#ASMSTART
394; SI-NEXT:    v_mov_b32_e64 v7, -1
395; SI-NEXT:    v_nop_e64
396; SI-NEXT:    v_nop_e64
397; SI-NEXT:    v_nop_e64
398; SI-NEXT:    v_nop_e64
399; SI-NEXT:    v_nop_e64
400; SI-NEXT:    v_nop_e64
401; SI-NEXT:    v_nop_e64
402; SI-NEXT:    v_nop_e64
403; SI-NEXT:    v_nop_e64
404; SI-NEXT:    v_nop_e64
405; SI-NEXT:    ;;#ASMEND
406; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v7
407; SI-NEXT:    s_andn2_b64 s[2:3], s[2:3], vcc
408; SI-NEXT:    s_cbranch_scc0 .LBB7_4
409; SI-NEXT:  ; %bb.3: ; %bb
410; SI-NEXT:    s_andn2_b64 exec, exec, vcc
411; SI-NEXT:    v_mov_b32_e32 v0, 1.0
412; SI-NEXT:    s_branch .LBB7_5
413; SI-NEXT:  .LBB7_4:
414; SI-NEXT:    s_mov_b64 exec, 0
415; SI-NEXT:    exp null off, off, off, off done vm
416; SI-NEXT:    s_endpgm
417; SI-NEXT:  .LBB7_5:
418;
419; GFX10-WAVE64-LABEL: test_kill_control_flow:
420; GFX10-WAVE64:       ; %bb.0: ; %entry
421; GFX10-WAVE64-NEXT:    s_cmp_lg_u32 s0, 0
422; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB7_2
423; GFX10-WAVE64-NEXT:  ; %bb.1: ; %exit
424; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v0, 1.0
425; GFX10-WAVE64-NEXT:    s_branch .LBB7_5
426; GFX10-WAVE64-NEXT:  .LBB7_2: ; %bb
427; GFX10-WAVE64-NEXT:    ;;#ASMSTART
428; GFX10-WAVE64-NEXT:    v_mov_b32_e64 v7, -1
429; GFX10-WAVE64-NEXT:    v_nop_e64
430; GFX10-WAVE64-NEXT:    v_nop_e64
431; GFX10-WAVE64-NEXT:    v_nop_e64
432; GFX10-WAVE64-NEXT:    v_nop_e64
433; GFX10-WAVE64-NEXT:    v_nop_e64
434; GFX10-WAVE64-NEXT:    v_nop_e64
435; GFX10-WAVE64-NEXT:    v_nop_e64
436; GFX10-WAVE64-NEXT:    v_nop_e64
437; GFX10-WAVE64-NEXT:    v_nop_e64
438; GFX10-WAVE64-NEXT:    v_nop_e64
439; GFX10-WAVE64-NEXT:    ;;#ASMEND
440; GFX10-WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v7
441; GFX10-WAVE64-NEXT:    s_mov_b64 s[2:3], exec
442; GFX10-WAVE64-NEXT:    s_andn2_b64 s[2:3], s[2:3], vcc
443; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB7_4
444; GFX10-WAVE64-NEXT:  ; %bb.3: ; %bb
445; GFX10-WAVE64-NEXT:    s_andn2_b64 exec, exec, vcc
446; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v0, 1.0
447; GFX10-WAVE64-NEXT:    s_branch .LBB7_5
448; GFX10-WAVE64-NEXT:  .LBB7_4:
449; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
450; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
451; GFX10-WAVE64-NEXT:    s_endpgm
452; GFX10-WAVE64-NEXT:  .LBB7_5:
453;
454; GFX10-WAVE32-LABEL: test_kill_control_flow:
455; GFX10-WAVE32:       ; %bb.0: ; %entry
456; GFX10-WAVE32-NEXT:    s_cmp_lg_u32 s0, 0
457; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB7_2
458; GFX10-WAVE32-NEXT:  ; %bb.1: ; %exit
459; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v0, 1.0
460; GFX10-WAVE32-NEXT:    s_branch .LBB7_5
461; GFX10-WAVE32-NEXT:  .LBB7_2: ; %bb
462; GFX10-WAVE32-NEXT:    ;;#ASMSTART
463; GFX10-WAVE32-NEXT:    v_mov_b32_e64 v7, -1
464; GFX10-WAVE32-NEXT:    v_nop_e64
465; GFX10-WAVE32-NEXT:    v_nop_e64
466; GFX10-WAVE32-NEXT:    v_nop_e64
467; GFX10-WAVE32-NEXT:    v_nop_e64
468; GFX10-WAVE32-NEXT:    v_nop_e64
469; GFX10-WAVE32-NEXT:    v_nop_e64
470; GFX10-WAVE32-NEXT:    v_nop_e64
471; GFX10-WAVE32-NEXT:    v_nop_e64
472; GFX10-WAVE32-NEXT:    v_nop_e64
473; GFX10-WAVE32-NEXT:    v_nop_e64
474; GFX10-WAVE32-NEXT:    ;;#ASMEND
475; GFX10-WAVE32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v7
476; GFX10-WAVE32-NEXT:    s_mov_b32 s1, exec_lo
477; GFX10-WAVE32-NEXT:    s_andn2_b32 s1, s1, vcc_lo
478; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB7_4
479; GFX10-WAVE32-NEXT:  ; %bb.3: ; %bb
480; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, vcc_lo
481; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v0, 1.0
482; GFX10-WAVE32-NEXT:    s_branch .LBB7_5
483; GFX10-WAVE32-NEXT:  .LBB7_4:
484; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
485; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
486; GFX10-WAVE32-NEXT:    s_endpgm
487; GFX10-WAVE32-NEXT:  .LBB7_5:
488;
489; GFX11-LABEL: test_kill_control_flow:
490; GFX11:       ; %bb.0: ; %entry
491; GFX11-NEXT:    s_cmp_lg_u32 s0, 0
492; GFX11-NEXT:    s_cbranch_scc0 .LBB7_2
493; GFX11-NEXT:  ; %bb.1: ; %exit
494; GFX11-NEXT:    v_mov_b32_e32 v0, 1.0
495; GFX11-NEXT:    s_branch .LBB7_5
496; GFX11-NEXT:  .LBB7_2: ; %bb
497; GFX11-NEXT:    ;;#ASMSTART
498; GFX11-NEXT:    v_mov_b32_e64 v7, -1
499; GFX11-NEXT:    v_nop_e64
500; GFX11-NEXT:    v_nop_e64
501; GFX11-NEXT:    v_nop_e64
502; GFX11-NEXT:    v_nop_e64
503; GFX11-NEXT:    v_nop_e64
504; GFX11-NEXT:    v_nop_e64
505; GFX11-NEXT:    v_nop_e64
506; GFX11-NEXT:    v_nop_e64
507; GFX11-NEXT:    v_nop_e64
508; GFX11-NEXT:    v_nop_e64
509; GFX11-NEXT:    ;;#ASMEND
510; GFX11-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v7
511; GFX11-NEXT:    s_mov_b64 s[2:3], exec
512; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
513; GFX11-NEXT:    s_and_not1_b64 s[2:3], s[2:3], vcc
514; GFX11-NEXT:    s_cbranch_scc0 .LBB7_4
515; GFX11-NEXT:  ; %bb.3: ; %bb
516; GFX11-NEXT:    s_and_not1_b64 exec, exec, vcc
517; GFX11-NEXT:    v_mov_b32_e32 v0, 1.0
518; GFX11-NEXT:    s_branch .LBB7_5
519; GFX11-NEXT:  .LBB7_4:
520; GFX11-NEXT:    s_mov_b64 exec, 0
521; GFX11-NEXT:    exp mrt0 off, off, off, off done
522; GFX11-NEXT:    s_endpgm
523; GFX11-NEXT:  .LBB7_5:
524entry:
525  %cmp = icmp eq i32 %arg, 0
526  br i1 %cmp, label %bb, label %exit
527
528bb:
529  %var = call float asm sideeffect "v_mov_b32_e64 v7, -1
530    v_nop_e64
531    v_nop_e64
532    v_nop_e64
533    v_nop_e64
534    v_nop_e64
535    v_nop_e64
536    v_nop_e64
537    v_nop_e64
538    v_nop_e64
539    v_nop_e64", "={v7}"()
540  %cmp.var = fcmp olt float %var, 0.0
541  ; TODO: We could do an early-exit here (the branch above is uniform!)
542  call void @llvm.amdgcn.kill(i1 %cmp.var)
543  br label %exit
544
545exit:
546  ret float 1.0
547}
548
549define amdgpu_ps void @test_kill_control_flow_remainder(i32 inreg %arg) #0 {
550; SI-LABEL: test_kill_control_flow_remainder:
551; SI:       ; %bb.0: ; %entry
552; SI-NEXT:    s_cmp_lg_u32 s0, 0
553; SI-NEXT:    v_mov_b32_e32 v9, 0
554; SI-NEXT:    s_cbranch_scc1 .LBB8_3
555; SI-NEXT:  ; %bb.1: ; %bb
556; SI-NEXT:    s_mov_b64 s[2:3], exec
557; SI-NEXT:    ;;#ASMSTART
558; SI-NEXT:    v_mov_b32_e64 v7, -1
559; SI-NEXT:    v_nop_e64
560; SI-NEXT:    v_nop_e64
561; SI-NEXT:    v_nop_e64
562; SI-NEXT:    v_nop_e64
563; SI-NEXT:    v_nop_e64
564; SI-NEXT:    v_nop_e64
565; SI-NEXT:    v_nop_e64
566; SI-NEXT:    v_nop_e64
567; SI-NEXT:    v_nop_e64
568; SI-NEXT:    v_nop_e64
569; SI-NEXT:    v_nop_e64
570; SI-NEXT:    ;;#ASMEND
571; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v7
572; SI-NEXT:    s_andn2_b64 s[2:3], s[2:3], vcc
573; SI-NEXT:    ;;#ASMSTART
574; SI-NEXT:    v_mov_b32_e64 v8, -1
575; SI-NEXT:    ;;#ASMEND
576; SI-NEXT:    s_cbranch_scc0 .LBB8_4
577; SI-NEXT:  ; %bb.2: ; %bb
578; SI-NEXT:    s_andn2_b64 exec, exec, vcc
579; SI-NEXT:    s_mov_b32 s3, 0xf000
580; SI-NEXT:    s_mov_b32 s2, -1
581; SI-NEXT:    buffer_store_dword v8, off, s[0:3], 0
582; SI-NEXT:    s_waitcnt vmcnt(0)
583; SI-NEXT:    ;;#ASMSTART
584; SI-NEXT:    v_mov_b32_e64 v9, -2
585; SI-NEXT:    ;;#ASMEND
586; SI-NEXT:  .LBB8_3: ; %exit
587; SI-NEXT:    s_mov_b32 s3, 0xf000
588; SI-NEXT:    s_mov_b32 s2, -1
589; SI-NEXT:    buffer_store_dword v9, off, s[0:3], 0
590; SI-NEXT:    s_endpgm
591; SI-NEXT:  .LBB8_4:
592; SI-NEXT:    s_mov_b64 exec, 0
593; SI-NEXT:    exp null off, off, off, off done vm
594; SI-NEXT:    s_endpgm
595;
596; GFX10-WAVE64-LABEL: test_kill_control_flow_remainder:
597; GFX10-WAVE64:       ; %bb.0: ; %entry
598; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v9, 0
599; GFX10-WAVE64-NEXT:    s_cmp_lg_u32 s0, 0
600; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB8_2
601; GFX10-WAVE64-NEXT:  ; %bb.1: ; %exit
602; GFX10-WAVE64-NEXT:    global_store_dword v[0:1], v9, off
603; GFX10-WAVE64-NEXT:    s_endpgm
604; GFX10-WAVE64-NEXT:  .LBB8_2: ; %bb
605; GFX10-WAVE64-NEXT:    ;;#ASMSTART
606; GFX10-WAVE64-NEXT:    v_mov_b32_e64 v7, -1
607; GFX10-WAVE64-NEXT:    v_nop_e64
608; GFX10-WAVE64-NEXT:    v_nop_e64
609; GFX10-WAVE64-NEXT:    v_nop_e64
610; GFX10-WAVE64-NEXT:    v_nop_e64
611; GFX10-WAVE64-NEXT:    v_nop_e64
612; GFX10-WAVE64-NEXT:    v_nop_e64
613; GFX10-WAVE64-NEXT:    v_nop_e64
614; GFX10-WAVE64-NEXT:    v_nop_e64
615; GFX10-WAVE64-NEXT:    v_nop_e64
616; GFX10-WAVE64-NEXT:    v_nop_e64
617; GFX10-WAVE64-NEXT:    v_nop_e64
618; GFX10-WAVE64-NEXT:    ;;#ASMEND
619; GFX10-WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v7
620; GFX10-WAVE64-NEXT:    s_mov_b64 s[2:3], exec
621; GFX10-WAVE64-NEXT:    ;;#ASMSTART
622; GFX10-WAVE64-NEXT:    v_mov_b32_e64 v8, -1
623; GFX10-WAVE64-NEXT:    ;;#ASMEND
624; GFX10-WAVE64-NEXT:    s_andn2_b64 s[2:3], s[2:3], vcc
625; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB8_4
626; GFX10-WAVE64-NEXT:  ; %bb.3: ; %bb
627; GFX10-WAVE64-NEXT:    s_andn2_b64 exec, exec, vcc
628; GFX10-WAVE64-NEXT:    global_store_dword v[0:1], v8, off
629; GFX10-WAVE64-NEXT:    s_waitcnt_vscnt null, 0x0
630; GFX10-WAVE64-NEXT:    ;;#ASMSTART
631; GFX10-WAVE64-NEXT:    v_mov_b32_e64 v9, -2
632; GFX10-WAVE64-NEXT:    ;;#ASMEND
633; GFX10-WAVE64-NEXT:    global_store_dword v[0:1], v9, off
634; GFX10-WAVE64-NEXT:    s_endpgm
635; GFX10-WAVE64-NEXT:  .LBB8_4:
636; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
637; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
638; GFX10-WAVE64-NEXT:    s_endpgm
639;
640; GFX10-WAVE32-LABEL: test_kill_control_flow_remainder:
641; GFX10-WAVE32:       ; %bb.0: ; %entry
642; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v9, 0
643; GFX10-WAVE32-NEXT:    s_cmp_lg_u32 s0, 0
644; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB8_2
645; GFX10-WAVE32-NEXT:  ; %bb.1: ; %exit
646; GFX10-WAVE32-NEXT:    global_store_dword v[0:1], v9, off
647; GFX10-WAVE32-NEXT:    s_endpgm
648; GFX10-WAVE32-NEXT:  .LBB8_2: ; %bb
649; GFX10-WAVE32-NEXT:    ;;#ASMSTART
650; GFX10-WAVE32-NEXT:    v_mov_b32_e64 v7, -1
651; GFX10-WAVE32-NEXT:    v_nop_e64
652; GFX10-WAVE32-NEXT:    v_nop_e64
653; GFX10-WAVE32-NEXT:    v_nop_e64
654; GFX10-WAVE32-NEXT:    v_nop_e64
655; GFX10-WAVE32-NEXT:    v_nop_e64
656; GFX10-WAVE32-NEXT:    v_nop_e64
657; GFX10-WAVE32-NEXT:    v_nop_e64
658; GFX10-WAVE32-NEXT:    v_nop_e64
659; GFX10-WAVE32-NEXT:    v_nop_e64
660; GFX10-WAVE32-NEXT:    v_nop_e64
661; GFX10-WAVE32-NEXT:    v_nop_e64
662; GFX10-WAVE32-NEXT:    ;;#ASMEND
663; GFX10-WAVE32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v7
664; GFX10-WAVE32-NEXT:    s_mov_b32 s1, exec_lo
665; GFX10-WAVE32-NEXT:    ;;#ASMSTART
666; GFX10-WAVE32-NEXT:    v_mov_b32_e64 v8, -1
667; GFX10-WAVE32-NEXT:    ;;#ASMEND
668; GFX10-WAVE32-NEXT:    s_andn2_b32 s1, s1, vcc_lo
669; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB8_4
670; GFX10-WAVE32-NEXT:  ; %bb.3: ; %bb
671; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, vcc_lo
672; GFX10-WAVE32-NEXT:    global_store_dword v[0:1], v8, off
673; GFX10-WAVE32-NEXT:    s_waitcnt_vscnt null, 0x0
674; GFX10-WAVE32-NEXT:    ;;#ASMSTART
675; GFX10-WAVE32-NEXT:    v_mov_b32_e64 v9, -2
676; GFX10-WAVE32-NEXT:    ;;#ASMEND
677; GFX10-WAVE32-NEXT:    global_store_dword v[0:1], v9, off
678; GFX10-WAVE32-NEXT:    s_endpgm
679; GFX10-WAVE32-NEXT:  .LBB8_4:
680; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
681; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
682; GFX10-WAVE32-NEXT:    s_endpgm
683;
684; GFX11-LABEL: test_kill_control_flow_remainder:
685; GFX11:       ; %bb.0: ; %entry
686; GFX11-NEXT:    v_mov_b32_e32 v9, 0
687; GFX11-NEXT:    s_cmp_lg_u32 s0, 0
688; GFX11-NEXT:    s_cbranch_scc0 .LBB8_2
689; GFX11-NEXT:  ; %bb.1: ; %exit
690; GFX11-NEXT:    global_store_b32 v[0:1], v9, off
691; GFX11-NEXT:    s_endpgm
692; GFX11-NEXT:  .LBB8_2: ; %bb
693; GFX11-NEXT:    ;;#ASMSTART
694; GFX11-NEXT:    v_mov_b32_e64 v7, -1
695; GFX11-NEXT:    v_nop_e64
696; GFX11-NEXT:    v_nop_e64
697; GFX11-NEXT:    v_nop_e64
698; GFX11-NEXT:    v_nop_e64
699; GFX11-NEXT:    v_nop_e64
700; GFX11-NEXT:    v_nop_e64
701; GFX11-NEXT:    v_nop_e64
702; GFX11-NEXT:    v_nop_e64
703; GFX11-NEXT:    v_nop_e64
704; GFX11-NEXT:    v_nop_e64
705; GFX11-NEXT:    v_nop_e64
706; GFX11-NEXT:    ;;#ASMEND
707; GFX11-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v7
708; GFX11-NEXT:    s_mov_b64 s[2:3], exec
709; GFX11-NEXT:    ;;#ASMSTART
710; GFX11-NEXT:    v_mov_b32_e64 v8, -1
711; GFX11-NEXT:    ;;#ASMEND
712; GFX11-NEXT:    s_and_not1_b64 s[2:3], s[2:3], vcc
713; GFX11-NEXT:    s_cbranch_scc0 .LBB8_4
714; GFX11-NEXT:  ; %bb.3: ; %bb
715; GFX11-NEXT:    s_and_not1_b64 exec, exec, vcc
716; GFX11-NEXT:    global_store_b32 v[0:1], v8, off dlc
717; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
718; GFX11-NEXT:    ;;#ASMSTART
719; GFX11-NEXT:    v_mov_b32_e64 v9, -2
720; GFX11-NEXT:    ;;#ASMEND
721; GFX11-NEXT:    global_store_b32 v[0:1], v9, off
722; GFX11-NEXT:    s_endpgm
723; GFX11-NEXT:  .LBB8_4:
724; GFX11-NEXT:    s_mov_b64 exec, 0
725; GFX11-NEXT:    exp mrt0 off, off, off, off done
726; GFX11-NEXT:    s_endpgm
727entry:
728  %cmp = icmp eq i32 %arg, 0
729  br i1 %cmp, label %bb, label %exit
730
731bb:
732  %var = call float asm sideeffect "v_mov_b32_e64 v7, -1
733    v_nop_e64
734    v_nop_e64
735    v_nop_e64
736    v_nop_e64
737    v_nop_e64
738    v_nop_e64
739    v_nop_e64
740    v_nop_e64
741    v_nop_e64
742    v_nop_e64
743    v_nop_e64", "={v7}"()
744  %live.across = call float asm sideeffect "v_mov_b32_e64 v8, -1", "={v8}"()
745  %cmp.var = fcmp olt float %var, 0.0
746  ; TODO: We could do an early-exit here (the branch above is uniform!)
747  call void @llvm.amdgcn.kill(i1 %cmp.var)
748  store volatile float %live.across, ptr addrspace(1) undef
749  %live.out = call float asm sideeffect "v_mov_b32_e64 v9, -2", "={v9}"()
750  br label %exit
751
752exit:
753  %phi = phi float [ 0.0, %entry ], [ %live.out, %bb ]
754  store float %phi, ptr addrspace(1) undef
755  ret void
756}
757
758define amdgpu_ps float @test_kill_control_flow_return(i32 inreg %arg) #0 {
759; SI-LABEL: test_kill_control_flow_return:
760; SI:       ; %bb.0: ; %entry
761; SI-NEXT:    s_cmp_eq_u32 s0, 1
762; SI-NEXT:    s_cselect_b64 s[4:5], -1, 0
763; SI-NEXT:    s_mov_b64 s[2:3], exec
764; SI-NEXT:    s_andn2_b64 s[4:5], exec, s[4:5]
765; SI-NEXT:    s_andn2_b64 s[2:3], s[2:3], s[4:5]
766; SI-NEXT:    s_cbranch_scc0 .LBB9_4
767; SI-NEXT:  ; %bb.1: ; %entry
768; SI-NEXT:    s_and_b64 exec, exec, s[2:3]
769; SI-NEXT:    s_cmp_lg_u32 s0, 0
770; SI-NEXT:    v_mov_b32_e32 v0, 0
771; SI-NEXT:    s_cbranch_scc0 .LBB9_3
772; SI-NEXT:  ; %bb.2: ; %exit
773; SI-NEXT:    s_branch .LBB9_5
774; SI-NEXT:  .LBB9_3: ; %bb
775; SI-NEXT:    ;;#ASMSTART
776; SI-NEXT:    v_mov_b32_e64 v7, -1
777; SI-NEXT:    v_nop_e64
778; SI-NEXT:    v_nop_e64
779; SI-NEXT:    v_nop_e64
780; SI-NEXT:    v_nop_e64
781; SI-NEXT:    v_nop_e64
782; SI-NEXT:    v_nop_e64
783; SI-NEXT:    v_nop_e64
784; SI-NEXT:    v_nop_e64
785; SI-NEXT:    v_nop_e64
786; SI-NEXT:    v_nop_e64
787; SI-NEXT:    ;;#ASMEND
788; SI-NEXT:    v_mov_b32_e32 v0, v7
789; SI-NEXT:    s_branch .LBB9_5
790; SI-NEXT:  .LBB9_4:
791; SI-NEXT:    s_mov_b64 exec, 0
792; SI-NEXT:    exp null off, off, off, off done vm
793; SI-NEXT:    s_endpgm
794; SI-NEXT:  .LBB9_5:
795;
796; GFX10-WAVE64-LABEL: test_kill_control_flow_return:
797; GFX10-WAVE64:       ; %bb.0: ; %entry
798; GFX10-WAVE64-NEXT:    s_cmp_eq_u32 s0, 1
799; GFX10-WAVE64-NEXT:    s_mov_b64 s[2:3], exec
800; GFX10-WAVE64-NEXT:    s_cselect_b64 s[4:5], -1, 0
801; GFX10-WAVE64-NEXT:    s_andn2_b64 s[4:5], exec, s[4:5]
802; GFX10-WAVE64-NEXT:    s_andn2_b64 s[2:3], s[2:3], s[4:5]
803; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB9_4
804; GFX10-WAVE64-NEXT:  ; %bb.1: ; %entry
805; GFX10-WAVE64-NEXT:    s_and_b64 exec, exec, s[2:3]
806; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v0, 0
807; GFX10-WAVE64-NEXT:    s_cmp_lg_u32 s0, 0
808; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB9_3
809; GFX10-WAVE64-NEXT:  ; %bb.2: ; %exit
810; GFX10-WAVE64-NEXT:    s_branch .LBB9_5
811; GFX10-WAVE64-NEXT:  .LBB9_3: ; %bb
812; GFX10-WAVE64-NEXT:    ;;#ASMSTART
813; GFX10-WAVE64-NEXT:    v_mov_b32_e64 v7, -1
814; GFX10-WAVE64-NEXT:    v_nop_e64
815; GFX10-WAVE64-NEXT:    v_nop_e64
816; GFX10-WAVE64-NEXT:    v_nop_e64
817; GFX10-WAVE64-NEXT:    v_nop_e64
818; GFX10-WAVE64-NEXT:    v_nop_e64
819; GFX10-WAVE64-NEXT:    v_nop_e64
820; GFX10-WAVE64-NEXT:    v_nop_e64
821; GFX10-WAVE64-NEXT:    v_nop_e64
822; GFX10-WAVE64-NEXT:    v_nop_e64
823; GFX10-WAVE64-NEXT:    v_nop_e64
824; GFX10-WAVE64-NEXT:    ;;#ASMEND
825; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v0, v7
826; GFX10-WAVE64-NEXT:    s_branch .LBB9_5
827; GFX10-WAVE64-NEXT:  .LBB9_4:
828; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
829; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
830; GFX10-WAVE64-NEXT:    s_endpgm
831; GFX10-WAVE64-NEXT:  .LBB9_5:
832;
833; GFX10-WAVE32-LABEL: test_kill_control_flow_return:
834; GFX10-WAVE32:       ; %bb.0: ; %entry
835; GFX10-WAVE32-NEXT:    s_cmp_eq_u32 s0, 1
836; GFX10-WAVE32-NEXT:    s_mov_b32 s1, exec_lo
837; GFX10-WAVE32-NEXT:    s_cselect_b32 s2, -1, 0
838; GFX10-WAVE32-NEXT:    s_andn2_b32 s2, exec_lo, s2
839; GFX10-WAVE32-NEXT:    s_andn2_b32 s1, s1, s2
840; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB9_4
841; GFX10-WAVE32-NEXT:  ; %bb.1: ; %entry
842; GFX10-WAVE32-NEXT:    s_and_b32 exec_lo, exec_lo, s1
843; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v0, 0
844; GFX10-WAVE32-NEXT:    s_cmp_lg_u32 s0, 0
845; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB9_3
846; GFX10-WAVE32-NEXT:  ; %bb.2: ; %exit
847; GFX10-WAVE32-NEXT:    s_branch .LBB9_5
848; GFX10-WAVE32-NEXT:  .LBB9_3: ; %bb
849; GFX10-WAVE32-NEXT:    ;;#ASMSTART
850; GFX10-WAVE32-NEXT:    v_mov_b32_e64 v7, -1
851; GFX10-WAVE32-NEXT:    v_nop_e64
852; GFX10-WAVE32-NEXT:    v_nop_e64
853; GFX10-WAVE32-NEXT:    v_nop_e64
854; GFX10-WAVE32-NEXT:    v_nop_e64
855; GFX10-WAVE32-NEXT:    v_nop_e64
856; GFX10-WAVE32-NEXT:    v_nop_e64
857; GFX10-WAVE32-NEXT:    v_nop_e64
858; GFX10-WAVE32-NEXT:    v_nop_e64
859; GFX10-WAVE32-NEXT:    v_nop_e64
860; GFX10-WAVE32-NEXT:    v_nop_e64
861; GFX10-WAVE32-NEXT:    ;;#ASMEND
862; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v0, v7
863; GFX10-WAVE32-NEXT:    s_branch .LBB9_5
864; GFX10-WAVE32-NEXT:  .LBB9_4:
865; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
866; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
867; GFX10-WAVE32-NEXT:    s_endpgm
868; GFX10-WAVE32-NEXT:  .LBB9_5:
869;
870; GFX11-LABEL: test_kill_control_flow_return:
871; GFX11:       ; %bb.0: ; %entry
872; GFX11-NEXT:    s_cmp_eq_u32 s0, 1
873; GFX11-NEXT:    s_mov_b64 s[2:3], exec
874; GFX11-NEXT:    s_cselect_b64 s[4:5], -1, 0
875; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
876; GFX11-NEXT:    s_and_not1_b64 s[4:5], exec, s[4:5]
877; GFX11-NEXT:    s_and_not1_b64 s[2:3], s[2:3], s[4:5]
878; GFX11-NEXT:    s_cbranch_scc0 .LBB9_4
879; GFX11-NEXT:  ; %bb.1: ; %entry
880; GFX11-NEXT:    s_and_b64 exec, exec, s[2:3]
881; GFX11-NEXT:    v_mov_b32_e32 v0, 0
882; GFX11-NEXT:    s_cmp_lg_u32 s0, 0
883; GFX11-NEXT:    s_cbranch_scc0 .LBB9_3
884; GFX11-NEXT:  ; %bb.2: ; %exit
885; GFX11-NEXT:    s_branch .LBB9_5
886; GFX11-NEXT:  .LBB9_3: ; %bb
887; GFX11-NEXT:    ;;#ASMSTART
888; GFX11-NEXT:    v_mov_b32_e64 v7, -1
889; GFX11-NEXT:    v_nop_e64
890; GFX11-NEXT:    v_nop_e64
891; GFX11-NEXT:    v_nop_e64
892; GFX11-NEXT:    v_nop_e64
893; GFX11-NEXT:    v_nop_e64
894; GFX11-NEXT:    v_nop_e64
895; GFX11-NEXT:    v_nop_e64
896; GFX11-NEXT:    v_nop_e64
897; GFX11-NEXT:    v_nop_e64
898; GFX11-NEXT:    v_nop_e64
899; GFX11-NEXT:    ;;#ASMEND
900; GFX11-NEXT:    v_mov_b32_e32 v0, v7
901; GFX11-NEXT:    s_branch .LBB9_5
902; GFX11-NEXT:  .LBB9_4:
903; GFX11-NEXT:    s_mov_b64 exec, 0
904; GFX11-NEXT:    exp mrt0 off, off, off, off done
905; GFX11-NEXT:    s_endpgm
906; GFX11-NEXT:  .LBB9_5:
907entry:
908  %kill = icmp eq i32 %arg, 1
909  %cmp = icmp eq i32 %arg, 0
910  call void @llvm.amdgcn.kill(i1 %kill)
911  br i1 %cmp, label %bb, label %exit
912
913bb:
914  %var = call float asm sideeffect "v_mov_b32_e64 v7, -1
915    v_nop_e64
916    v_nop_e64
917    v_nop_e64
918    v_nop_e64
919    v_nop_e64
920    v_nop_e64
921    v_nop_e64
922    v_nop_e64
923    v_nop_e64
924    v_nop_e64", "={v7}"()
925  br label %exit
926
927exit:
928  %ret = phi float [ %var, %bb ], [ 0.0, %entry ]
929  ret float %ret
930}
931
932define amdgpu_ps void @test_kill_divergent_loop(i32 %arg) #0 {
933; SI-LABEL: test_kill_divergent_loop:
934; SI:       ; %bb.0: ; %entry
935; SI-NEXT:    s_mov_b64 s[0:1], exec
936; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
937; SI-NEXT:    s_and_saveexec_b64 s[2:3], vcc
938; SI-NEXT:    s_xor_b64 s[4:5], exec, s[2:3]
939; SI-NEXT:    s_cbranch_execz .LBB10_4
940; SI-NEXT:  ; %bb.1: ; %bb.preheader
941; SI-NEXT:    s_mov_b32 s3, 0xf000
942; SI-NEXT:    s_mov_b32 s2, -1
943; SI-NEXT:  .LBB10_2: ; %bb
944; SI-NEXT:    ; =>This Inner Loop Header: Depth=1
945; SI-NEXT:    ;;#ASMSTART
946; SI-NEXT:    v_mov_b32_e64 v7, -1
947; SI-NEXT:    v_nop_e64
948; SI-NEXT:    v_nop_e64
949; SI-NEXT:    v_nop_e64
950; SI-NEXT:    v_nop_e64
951; SI-NEXT:    v_nop_e64
952; SI-NEXT:    v_nop_e64
953; SI-NEXT:    v_nop_e64
954; SI-NEXT:    v_nop_e64
955; SI-NEXT:    v_nop_e64
956; SI-NEXT:    v_nop_e64
957; SI-NEXT:    ;;#ASMEND
958; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v7
959; SI-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
960; SI-NEXT:    s_cbranch_scc0 .LBB10_5
961; SI-NEXT:  ; %bb.3: ; %bb
962; SI-NEXT:    ; in Loop: Header=BB10_2 Depth=1
963; SI-NEXT:    s_andn2_b64 exec, exec, vcc
964; SI-NEXT:    buffer_load_dword v0, off, s[0:3], 0 glc
965; SI-NEXT:    s_waitcnt vmcnt(0)
966; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
967; SI-NEXT:    s_cbranch_vccnz .LBB10_2
968; SI-NEXT:  .LBB10_4: ; %Flow1
969; SI-NEXT:    s_or_b64 exec, exec, s[4:5]
970; SI-NEXT:    s_mov_b32 s3, 0xf000
971; SI-NEXT:    s_mov_b32 s2, -1
972; SI-NEXT:    v_mov_b32_e32 v0, 8
973; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
974; SI-NEXT:    s_waitcnt vmcnt(0)
975; SI-NEXT:    s_endpgm
976; SI-NEXT:  .LBB10_5:
977; SI-NEXT:    s_mov_b64 exec, 0
978; SI-NEXT:    exp null off, off, off, off done vm
979; SI-NEXT:    s_endpgm
980;
981; GFX10-WAVE64-LABEL: test_kill_divergent_loop:
982; GFX10-WAVE64:       ; %bb.0: ; %entry
983; GFX10-WAVE64-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
984; GFX10-WAVE64-NEXT:    s_mov_b64 s[0:1], exec
985; GFX10-WAVE64-NEXT:    s_and_saveexec_b64 s[2:3], vcc
986; GFX10-WAVE64-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
987; GFX10-WAVE64-NEXT:    s_cbranch_execz .LBB10_3
988; GFX10-WAVE64-NEXT:  .LBB10_1: ; %bb
989; GFX10-WAVE64-NEXT:    ; =>This Inner Loop Header: Depth=1
990; GFX10-WAVE64-NEXT:    ;;#ASMSTART
991; GFX10-WAVE64-NEXT:    v_mov_b32_e64 v7, -1
992; GFX10-WAVE64-NEXT:    v_nop_e64
993; GFX10-WAVE64-NEXT:    v_nop_e64
994; GFX10-WAVE64-NEXT:    v_nop_e64
995; GFX10-WAVE64-NEXT:    v_nop_e64
996; GFX10-WAVE64-NEXT:    v_nop_e64
997; GFX10-WAVE64-NEXT:    v_nop_e64
998; GFX10-WAVE64-NEXT:    v_nop_e64
999; GFX10-WAVE64-NEXT:    v_nop_e64
1000; GFX10-WAVE64-NEXT:    v_nop_e64
1001; GFX10-WAVE64-NEXT:    v_nop_e64
1002; GFX10-WAVE64-NEXT:    ;;#ASMEND
1003; GFX10-WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v7
1004; GFX10-WAVE64-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
1005; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB10_4
1006; GFX10-WAVE64-NEXT:  ; %bb.2: ; %bb
1007; GFX10-WAVE64-NEXT:    ; in Loop: Header=BB10_1 Depth=1
1008; GFX10-WAVE64-NEXT:    s_andn2_b64 exec, exec, vcc
1009; GFX10-WAVE64-NEXT:    global_load_dword v0, v[0:1], off glc dlc
1010; GFX10-WAVE64-NEXT:    s_waitcnt vmcnt(0)
1011; GFX10-WAVE64-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1012; GFX10-WAVE64-NEXT:    s_cbranch_vccnz .LBB10_1
1013; GFX10-WAVE64-NEXT:  .LBB10_3: ; %Flow1
1014; GFX10-WAVE64-NEXT:    s_or_b64 exec, exec, s[2:3]
1015; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v0, 8
1016; GFX10-WAVE64-NEXT:    global_store_dword v[0:1], v0, off
1017; GFX10-WAVE64-NEXT:    s_waitcnt_vscnt null, 0x0
1018; GFX10-WAVE64-NEXT:    s_endpgm
1019; GFX10-WAVE64-NEXT:  .LBB10_4:
1020; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
1021; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
1022; GFX10-WAVE64-NEXT:    s_endpgm
1023;
1024; GFX10-WAVE32-LABEL: test_kill_divergent_loop:
1025; GFX10-WAVE32:       ; %bb.0: ; %entry
1026; GFX10-WAVE32-NEXT:    s_mov_b32 s0, exec_lo
1027; GFX10-WAVE32-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
1028; GFX10-WAVE32-NEXT:    s_and_saveexec_b32 s1, vcc_lo
1029; GFX10-WAVE32-NEXT:    s_xor_b32 s1, exec_lo, s1
1030; GFX10-WAVE32-NEXT:    s_cbranch_execz .LBB10_3
1031; GFX10-WAVE32-NEXT:  .LBB10_1: ; %bb
1032; GFX10-WAVE32-NEXT:    ; =>This Inner Loop Header: Depth=1
1033; GFX10-WAVE32-NEXT:    ;;#ASMSTART
1034; GFX10-WAVE32-NEXT:    v_mov_b32_e64 v7, -1
1035; GFX10-WAVE32-NEXT:    v_nop_e64
1036; GFX10-WAVE32-NEXT:    v_nop_e64
1037; GFX10-WAVE32-NEXT:    v_nop_e64
1038; GFX10-WAVE32-NEXT:    v_nop_e64
1039; GFX10-WAVE32-NEXT:    v_nop_e64
1040; GFX10-WAVE32-NEXT:    v_nop_e64
1041; GFX10-WAVE32-NEXT:    v_nop_e64
1042; GFX10-WAVE32-NEXT:    v_nop_e64
1043; GFX10-WAVE32-NEXT:    v_nop_e64
1044; GFX10-WAVE32-NEXT:    v_nop_e64
1045; GFX10-WAVE32-NEXT:    ;;#ASMEND
1046; GFX10-WAVE32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v7
1047; GFX10-WAVE32-NEXT:    s_andn2_b32 s0, s0, vcc_lo
1048; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB10_4
1049; GFX10-WAVE32-NEXT:  ; %bb.2: ; %bb
1050; GFX10-WAVE32-NEXT:    ; in Loop: Header=BB10_1 Depth=1
1051; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, vcc_lo
1052; GFX10-WAVE32-NEXT:    global_load_dword v0, v[0:1], off glc dlc
1053; GFX10-WAVE32-NEXT:    s_waitcnt vmcnt(0)
1054; GFX10-WAVE32-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
1055; GFX10-WAVE32-NEXT:    s_cbranch_vccnz .LBB10_1
1056; GFX10-WAVE32-NEXT:  .LBB10_3: ; %Flow1
1057; GFX10-WAVE32-NEXT:    s_or_b32 exec_lo, exec_lo, s1
1058; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v0, 8
1059; GFX10-WAVE32-NEXT:    global_store_dword v[0:1], v0, off
1060; GFX10-WAVE32-NEXT:    s_waitcnt_vscnt null, 0x0
1061; GFX10-WAVE32-NEXT:    s_endpgm
1062; GFX10-WAVE32-NEXT:  .LBB10_4:
1063; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
1064; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
1065; GFX10-WAVE32-NEXT:    s_endpgm
1066;
1067; GFX11-LABEL: test_kill_divergent_loop:
1068; GFX11:       ; %bb.0: ; %entry
1069; GFX11-NEXT:    s_mov_b64 s[0:1], exec
1070; GFX11-NEXT:    s_mov_b64 s[2:3], exec
1071; GFX11-NEXT:    v_cmpx_eq_u32_e32 0, v0
1072; GFX11-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
1073; GFX11-NEXT:    s_cbranch_execz .LBB10_3
1074; GFX11-NEXT:  .LBB10_1: ; %bb
1075; GFX11-NEXT:    ; =>This Inner Loop Header: Depth=1
1076; GFX11-NEXT:    ;;#ASMSTART
1077; GFX11-NEXT:    v_mov_b32_e64 v7, -1
1078; GFX11-NEXT:    v_nop_e64
1079; GFX11-NEXT:    v_nop_e64
1080; GFX11-NEXT:    v_nop_e64
1081; GFX11-NEXT:    v_nop_e64
1082; GFX11-NEXT:    v_nop_e64
1083; GFX11-NEXT:    v_nop_e64
1084; GFX11-NEXT:    v_nop_e64
1085; GFX11-NEXT:    v_nop_e64
1086; GFX11-NEXT:    v_nop_e64
1087; GFX11-NEXT:    v_nop_e64
1088; GFX11-NEXT:    ;;#ASMEND
1089; GFX11-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v7
1090; GFX11-NEXT:    s_and_not1_b64 s[0:1], s[0:1], vcc
1091; GFX11-NEXT:    s_cbranch_scc0 .LBB10_4
1092; GFX11-NEXT:  ; %bb.2: ; %bb
1093; GFX11-NEXT:    ; in Loop: Header=BB10_1 Depth=1
1094; GFX11-NEXT:    s_and_not1_b64 exec, exec, vcc
1095; GFX11-NEXT:    global_load_b32 v0, v[0:1], off glc dlc
1096; GFX11-NEXT:    s_waitcnt vmcnt(0)
1097; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1098; GFX11-NEXT:    s_cbranch_vccnz .LBB10_1
1099; GFX11-NEXT:  .LBB10_3: ; %Flow1
1100; GFX11-NEXT:    s_or_b64 exec, exec, s[2:3]
1101; GFX11-NEXT:    v_mov_b32_e32 v0, 8
1102; GFX11-NEXT:    global_store_b32 v[0:1], v0, off dlc
1103; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1104; GFX11-NEXT:    s_endpgm
1105; GFX11-NEXT:  .LBB10_4:
1106; GFX11-NEXT:    s_mov_b64 exec, 0
1107; GFX11-NEXT:    exp mrt0 off, off, off, off done
1108; GFX11-NEXT:    s_endpgm
1109entry:
1110  %cmp = icmp eq i32 %arg, 0
1111  br i1 %cmp, label %bb, label %exit
1112
1113bb:
1114  %var = call float asm sideeffect "v_mov_b32_e64 v7, -1
1115    v_nop_e64
1116    v_nop_e64
1117    v_nop_e64
1118    v_nop_e64
1119    v_nop_e64
1120    v_nop_e64
1121    v_nop_e64
1122    v_nop_e64
1123    v_nop_e64
1124    v_nop_e64", "={v7}"()
1125  %cmp.var = fcmp olt float %var, 0.0
1126  call void @llvm.amdgcn.kill(i1 %cmp.var)
1127  %vgpr = load volatile i32, ptr addrspace(1) undef
1128  %loop.cond = icmp eq i32 %vgpr, 0
1129  br i1 %loop.cond, label %bb, label %exit
1130
1131exit:
1132  store volatile i32 8, ptr addrspace(1) undef
1133  ret void
1134}
1135
1136; bug 28550
1137define amdgpu_ps void @phi_use_def_before_kill(float inreg %x) #0 {
1138; SI-LABEL: phi_use_def_before_kill:
1139; SI:       ; %bb.0: ; %bb
1140; SI-NEXT:    v_add_f32_e64 v1, s0, 1.0
1141; SI-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v1
1142; SI-NEXT:    v_cndmask_b32_e64 v0, 0, -1.0, vcc
1143; SI-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v1
1144; SI-NEXT:    s_andn2_b64 exec, exec, vcc
1145; SI-NEXT:    s_cbranch_scc0 .LBB11_6
1146; SI-NEXT:  ; %bb.1: ; %bb
1147; SI-NEXT:    s_andn2_b64 exec, exec, vcc
1148; SI-NEXT:    s_cbranch_scc0 .LBB11_3
1149; SI-NEXT:  ; %bb.2: ; %bb8
1150; SI-NEXT:    s_mov_b32 s3, 0xf000
1151; SI-NEXT:    s_mov_b32 s2, -1
1152; SI-NEXT:    v_mov_b32_e32 v0, 8
1153; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
1154; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1155; SI-NEXT:    v_mov_b32_e32 v0, 4.0
1156; SI-NEXT:  .LBB11_3: ; %phibb
1157; SI-NEXT:    v_cmp_eq_f32_e32 vcc, 0, v0
1158; SI-NEXT:    s_cbranch_vccz .LBB11_5
1159; SI-NEXT:  ; %bb.4: ; %bb10
1160; SI-NEXT:    s_mov_b32 s3, 0xf000
1161; SI-NEXT:    s_mov_b32 s2, -1
1162; SI-NEXT:    v_mov_b32_e32 v0, 9
1163; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
1164; SI-NEXT:    s_waitcnt vmcnt(0)
1165; SI-NEXT:  .LBB11_5: ; %end
1166; SI-NEXT:    s_endpgm
1167; SI-NEXT:  .LBB11_6:
1168; SI-NEXT:    s_mov_b64 exec, 0
1169; SI-NEXT:    exp null off, off, off, off done vm
1170; SI-NEXT:    s_endpgm
1171;
1172; GFX10-WAVE64-LABEL: phi_use_def_before_kill:
1173; GFX10-WAVE64:       ; %bb.0: ; %bb
1174; GFX10-WAVE64-NEXT:    v_add_f32_e64 v1, s0, 1.0
1175; GFX10-WAVE64-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v1
1176; GFX10-WAVE64-NEXT:    v_cndmask_b32_e64 v0, 0, -1.0, vcc
1177; GFX10-WAVE64-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v1
1178; GFX10-WAVE64-NEXT:    s_andn2_b64 exec, exec, vcc
1179; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB11_6
1180; GFX10-WAVE64-NEXT:  ; %bb.1: ; %bb
1181; GFX10-WAVE64-NEXT:    s_andn2_b64 exec, exec, vcc
1182; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB11_3
1183; GFX10-WAVE64-NEXT:  ; %bb.2: ; %bb8
1184; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v1, 8
1185; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v0, 4.0
1186; GFX10-WAVE64-NEXT:    global_store_dword v[0:1], v1, off
1187; GFX10-WAVE64-NEXT:    s_waitcnt_vscnt null, 0x0
1188; GFX10-WAVE64-NEXT:  .LBB11_3: ; %phibb
1189; GFX10-WAVE64-NEXT:    v_cmp_eq_f32_e32 vcc, 0, v0
1190; GFX10-WAVE64-NEXT:    s_cbranch_vccz .LBB11_5
1191; GFX10-WAVE64-NEXT:  ; %bb.4: ; %bb10
1192; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v0, 9
1193; GFX10-WAVE64-NEXT:    global_store_dword v[0:1], v0, off
1194; GFX10-WAVE64-NEXT:    s_waitcnt_vscnt null, 0x0
1195; GFX10-WAVE64-NEXT:  .LBB11_5: ; %end
1196; GFX10-WAVE64-NEXT:    s_endpgm
1197; GFX10-WAVE64-NEXT:  .LBB11_6:
1198; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
1199; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
1200; GFX10-WAVE64-NEXT:    s_endpgm
1201;
1202; GFX10-WAVE32-LABEL: phi_use_def_before_kill:
1203; GFX10-WAVE32:       ; %bb.0: ; %bb
1204; GFX10-WAVE32-NEXT:    v_add_f32_e64 v1, s0, 1.0
1205; GFX10-WAVE32-NEXT:    v_cmp_lt_f32_e32 vcc_lo, 0, v1
1206; GFX10-WAVE32-NEXT:    v_cndmask_b32_e64 v0, 0, -1.0, vcc_lo
1207; GFX10-WAVE32-NEXT:    v_cmp_nlt_f32_e32 vcc_lo, 0, v1
1208; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, vcc_lo
1209; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB11_6
1210; GFX10-WAVE32-NEXT:  ; %bb.1: ; %bb
1211; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, vcc_lo
1212; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB11_3
1213; GFX10-WAVE32-NEXT:  ; %bb.2: ; %bb8
1214; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v1, 8
1215; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v0, 4.0
1216; GFX10-WAVE32-NEXT:    global_store_dword v[0:1], v1, off
1217; GFX10-WAVE32-NEXT:    s_waitcnt_vscnt null, 0x0
1218; GFX10-WAVE32-NEXT:  .LBB11_3: ; %phibb
1219; GFX10-WAVE32-NEXT:    v_cmp_eq_f32_e32 vcc_lo, 0, v0
1220; GFX10-WAVE32-NEXT:    s_cbranch_vccz .LBB11_5
1221; GFX10-WAVE32-NEXT:  ; %bb.4: ; %bb10
1222; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v0, 9
1223; GFX10-WAVE32-NEXT:    global_store_dword v[0:1], v0, off
1224; GFX10-WAVE32-NEXT:    s_waitcnt_vscnt null, 0x0
1225; GFX10-WAVE32-NEXT:  .LBB11_5: ; %end
1226; GFX10-WAVE32-NEXT:    s_endpgm
1227; GFX10-WAVE32-NEXT:  .LBB11_6:
1228; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
1229; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
1230; GFX10-WAVE32-NEXT:    s_endpgm
1231;
1232; GFX11-LABEL: phi_use_def_before_kill:
1233; GFX11:       ; %bb.0: ; %bb
1234; GFX11-NEXT:    v_add_f32_e64 v1, s0, 1.0
1235; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1236; GFX11-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v1
1237; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, -1.0, vcc
1238; GFX11-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v1
1239; GFX11-NEXT:    s_and_not1_b64 exec, exec, vcc
1240; GFX11-NEXT:    s_cbranch_scc0 .LBB11_6
1241; GFX11-NEXT:  ; %bb.1: ; %bb
1242; GFX11-NEXT:    s_and_not1_b64 exec, exec, vcc
1243; GFX11-NEXT:    s_cbranch_scc0 .LBB11_3
1244; GFX11-NEXT:  ; %bb.2: ; %bb8
1245; GFX11-NEXT:    v_mov_b32_e32 v1, 8
1246; GFX11-NEXT:    v_mov_b32_e32 v0, 4.0
1247; GFX11-NEXT:    global_store_b32 v[0:1], v1, off dlc
1248; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1249; GFX11-NEXT:  .LBB11_3: ; %phibb
1250; GFX11-NEXT:    v_cmp_eq_f32_e32 vcc, 0, v0
1251; GFX11-NEXT:    s_cbranch_vccz .LBB11_5
1252; GFX11-NEXT:  ; %bb.4: ; %bb10
1253; GFX11-NEXT:    v_mov_b32_e32 v0, 9
1254; GFX11-NEXT:    global_store_b32 v[0:1], v0, off dlc
1255; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1256; GFX11-NEXT:  .LBB11_5: ; %end
1257; GFX11-NEXT:    s_endpgm
1258; GFX11-NEXT:  .LBB11_6:
1259; GFX11-NEXT:    s_mov_b64 exec, 0
1260; GFX11-NEXT:    exp mrt0 off, off, off, off done
1261; GFX11-NEXT:    s_endpgm
1262bb:
1263  %tmp = fadd float %x, 1.000000e+00
1264  %tmp1 = fcmp olt float 0.000000e+00, %tmp
1265  %tmp2 = select i1 %tmp1, float -1.000000e+00, float 0.000000e+00
1266  %cmp.tmp2 = fcmp olt float %tmp2, 0.0
1267  call void @llvm.amdgcn.kill(i1 %cmp.tmp2)
1268  br i1 undef, label %phibb, label %bb8
1269
1270phibb:
1271  %tmp5 = phi float [ %tmp2, %bb ], [ 4.0, %bb8 ]
1272  %tmp6 = fcmp oeq float %tmp5, 0.000000e+00
1273  br i1 %tmp6, label %bb10, label %end
1274
1275bb8:
1276  store volatile i32 8, ptr addrspace(1) undef
1277  br label %phibb
1278
1279bb10:
1280  store volatile i32 9, ptr addrspace(1) undef
1281  br label %end
1282
1283end:
1284  ret void
1285}
1286
1287define amdgpu_ps void @no_skip_no_successors(float inreg %arg, float inreg %arg1) #0 {
1288; SI-LABEL: no_skip_no_successors:
1289; SI:       ; %bb.0: ; %bb
1290; SI-NEXT:    v_cmp_nge_f32_e64 s[4:5], s1, 0
1291; SI-NEXT:    s_and_b64 vcc, exec, s[4:5]
1292; SI-NEXT:    s_cbranch_vccz .LBB12_3
1293; SI-NEXT:  ; %bb.1: ; %bb6
1294; SI-NEXT:    s_mov_b64 s[2:3], exec
1295; SI-NEXT:    s_andn2_b64 s[2:3], s[2:3], exec
1296; SI-NEXT:    s_cbranch_scc0 .LBB12_5
1297; SI-NEXT:  ; %bb.2: ; %bb6
1298; SI-NEXT:    s_mov_b64 exec, 0
1299; SI-NEXT:  .LBB12_3: ; %bb3
1300; SI-NEXT:    v_mov_b32_e32 v0, 0x3e7ae148
1301; SI-NEXT:    v_cmp_nge_f32_e32 vcc, s0, v0
1302; SI-NEXT:    s_and_b64 vcc, exec, vcc
1303; SI-NEXT:  ; %bb.4: ; %bb5
1304; SI-NEXT:  .LBB12_5:
1305; SI-NEXT:    s_mov_b64 exec, 0
1306; SI-NEXT:    exp null off, off, off, off done vm
1307; SI-NEXT:    s_endpgm
1308;
1309; GFX10-WAVE64-LABEL: no_skip_no_successors:
1310; GFX10-WAVE64:       ; %bb.0: ; %bb
1311; GFX10-WAVE64-NEXT:    v_cmp_nge_f32_e64 s[4:5], s1, 0
1312; GFX10-WAVE64-NEXT:    s_and_b64 vcc, exec, s[4:5]
1313; GFX10-WAVE64-NEXT:    s_cbranch_vccz .LBB12_3
1314; GFX10-WAVE64-NEXT:  ; %bb.1: ; %bb6
1315; GFX10-WAVE64-NEXT:    s_mov_b64 s[2:3], exec
1316; GFX10-WAVE64-NEXT:    s_andn2_b64 s[2:3], s[2:3], exec
1317; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB12_5
1318; GFX10-WAVE64-NEXT:  ; %bb.2: ; %bb6
1319; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
1320; GFX10-WAVE64-NEXT:  .LBB12_3: ; %bb3
1321; GFX10-WAVE64-NEXT:    v_cmp_nle_f32_e64 s[0:1], 0x3e7ae148, s0
1322; GFX10-WAVE64-NEXT:    s_and_b64 vcc, exec, s[0:1]
1323; GFX10-WAVE64-NEXT:  ; %bb.4: ; %bb5
1324; GFX10-WAVE64-NEXT:  .LBB12_5:
1325; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
1326; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
1327; GFX10-WAVE64-NEXT:    s_endpgm
1328;
1329; GFX10-WAVE32-LABEL: no_skip_no_successors:
1330; GFX10-WAVE32:       ; %bb.0: ; %bb
1331; GFX10-WAVE32-NEXT:    v_cmp_nge_f32_e64 s1, s1, 0
1332; GFX10-WAVE32-NEXT:    s_and_b32 vcc_lo, exec_lo, s1
1333; GFX10-WAVE32-NEXT:    s_cbranch_vccz .LBB12_3
1334; GFX10-WAVE32-NEXT:  ; %bb.1: ; %bb6
1335; GFX10-WAVE32-NEXT:    s_mov_b32 s2, exec_lo
1336; GFX10-WAVE32-NEXT:    s_andn2_b32 s2, s2, exec_lo
1337; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB12_5
1338; GFX10-WAVE32-NEXT:  ; %bb.2: ; %bb6
1339; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
1340; GFX10-WAVE32-NEXT:  .LBB12_3: ; %bb3
1341; GFX10-WAVE32-NEXT:    v_cmp_nle_f32_e64 s0, 0x3e7ae148, s0
1342; GFX10-WAVE32-NEXT:    s_and_b32 vcc_lo, exec_lo, s0
1343; GFX10-WAVE32-NEXT:  ; %bb.4: ; %bb5
1344; GFX10-WAVE32-NEXT:  .LBB12_5:
1345; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
1346; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
1347; GFX10-WAVE32-NEXT:    s_endpgm
1348;
1349; GFX11-LABEL: no_skip_no_successors:
1350; GFX11:       ; %bb.0: ; %bb
1351; GFX11-NEXT:    v_cmp_nge_f32_e64 s[4:5], s1, 0
1352; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1353; GFX11-NEXT:    s_and_b64 vcc, exec, s[4:5]
1354; GFX11-NEXT:    s_cbranch_vccz .LBB12_3
1355; GFX11-NEXT:  ; %bb.1: ; %bb6
1356; GFX11-NEXT:    s_mov_b64 s[2:3], exec
1357; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1358; GFX11-NEXT:    s_and_not1_b64 s[2:3], s[2:3], exec
1359; GFX11-NEXT:    s_cbranch_scc0 .LBB12_5
1360; GFX11-NEXT:  ; %bb.2: ; %bb6
1361; GFX11-NEXT:    s_mov_b64 exec, 0
1362; GFX11-NEXT:  .LBB12_3: ; %bb3
1363; GFX11-NEXT:    v_cmp_nle_f32_e64 s[0:1], 0x3e7ae148, s0
1364; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1365; GFX11-NEXT:    s_and_b64 vcc, exec, s[0:1]
1366; GFX11-NEXT:  ; %bb.4: ; %bb5
1367; GFX11-NEXT:  .LBB12_5:
1368; GFX11-NEXT:    s_mov_b64 exec, 0
1369; GFX11-NEXT:    exp mrt0 off, off, off, off done
1370; GFX11-NEXT:    s_endpgm
1371bb:
1372  %tmp = fcmp ult float %arg1, 0.000000e+00
1373  br i1 %tmp, label %bb6, label %bb3
1374
1375bb3:                                              ; preds = %bb
1376  %tmp2 = fcmp ult float %arg, 0x3FCF5C2900000000
1377  br i1 %tmp2, label %bb5, label %bb4
1378
1379bb4:                                              ; preds = %bb3
1380  br i1 true, label %bb5, label %bb7
1381
1382bb5:                                              ; preds = %bb4, %bb3
1383  unreachable
1384
1385bb6:                                              ; preds = %bb
1386  call void @llvm.amdgcn.kill(i1 false)
1387  unreachable
1388
1389bb7:                                              ; preds = %bb4
1390  ret void
1391}
1392
1393define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, float %arg2, float %arg3) #0 {
1394; SI-LABEL: if_after_kill_block:
1395; SI:       ; %bb.0: ; %bb
1396; SI-NEXT:    s_mov_b64 s[0:1], exec
1397; SI-NEXT:    s_wqm_b64 exec, exec
1398; SI-NEXT:    v_cmp_nle_f32_e32 vcc, 0, v1
1399; SI-NEXT:    s_and_saveexec_b64 s[2:3], vcc
1400; SI-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
1401; SI-NEXT:    s_cbranch_execz .LBB13_3
1402; SI-NEXT:  ; %bb.1: ; %bb3
1403; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
1404; SI-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
1405; SI-NEXT:    s_cbranch_scc0 .LBB13_6
1406; SI-NEXT:  ; %bb.2: ; %bb3
1407; SI-NEXT:    s_andn2_b64 exec, exec, vcc
1408; SI-NEXT:  .LBB13_3: ; %bb4
1409; SI-NEXT:    s_or_b64 exec, exec, s[2:3]
1410; SI-NEXT:    image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10
1411; SI-NEXT:    s_waitcnt vmcnt(0)
1412; SI-NEXT:    v_cmp_neq_f32_e32 vcc, 0, v0
1413; SI-NEXT:    s_and_saveexec_b64 s[0:1], vcc
1414; SI-NEXT:    s_cbranch_execz .LBB13_5
1415; SI-NEXT:  ; %bb.4: ; %bb8
1416; SI-NEXT:    s_mov_b32 s3, 0xf000
1417; SI-NEXT:    s_mov_b32 s2, -1
1418; SI-NEXT:    v_mov_b32_e32 v0, 9
1419; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
1420; SI-NEXT:    s_waitcnt vmcnt(0)
1421; SI-NEXT:  .LBB13_5: ; %UnifiedReturnBlock
1422; SI-NEXT:    s_endpgm
1423; SI-NEXT:  .LBB13_6:
1424; SI-NEXT:    s_mov_b64 exec, 0
1425; SI-NEXT:    exp null off, off, off, off done vm
1426; SI-NEXT:    s_endpgm
1427;
1428; GFX10-WAVE64-LABEL: if_after_kill_block:
1429; GFX10-WAVE64:       ; %bb.0: ; %bb
1430; GFX10-WAVE64-NEXT:    s_mov_b64 s[0:1], exec
1431; GFX10-WAVE64-NEXT:    s_wqm_b64 exec, exec
1432; GFX10-WAVE64-NEXT:    v_cmp_nle_f32_e32 vcc, 0, v1
1433; GFX10-WAVE64-NEXT:    s_and_saveexec_b64 s[2:3], vcc
1434; GFX10-WAVE64-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
1435; GFX10-WAVE64-NEXT:    s_cbranch_execz .LBB13_3
1436; GFX10-WAVE64-NEXT:  ; %bb.1: ; %bb3
1437; GFX10-WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
1438; GFX10-WAVE64-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
1439; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB13_6
1440; GFX10-WAVE64-NEXT:  ; %bb.2: ; %bb3
1441; GFX10-WAVE64-NEXT:    s_andn2_b64 exec, exec, vcc
1442; GFX10-WAVE64-NEXT:  .LBB13_3: ; %bb4
1443; GFX10-WAVE64-NEXT:    s_or_b64 exec, exec, s[2:3]
1444; GFX10-WAVE64-NEXT:    image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10 dim:SQ_RSRC_IMG_1D
1445; GFX10-WAVE64-NEXT:    s_waitcnt vmcnt(0)
1446; GFX10-WAVE64-NEXT:    v_cmp_neq_f32_e32 vcc, 0, v0
1447; GFX10-WAVE64-NEXT:    s_and_saveexec_b64 s[0:1], vcc
1448; GFX10-WAVE64-NEXT:    s_cbranch_execz .LBB13_5
1449; GFX10-WAVE64-NEXT:  ; %bb.4: ; %bb8
1450; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v0, 9
1451; GFX10-WAVE64-NEXT:    global_store_dword v[0:1], v0, off
1452; GFX10-WAVE64-NEXT:    s_waitcnt_vscnt null, 0x0
1453; GFX10-WAVE64-NEXT:  .LBB13_5: ; %UnifiedReturnBlock
1454; GFX10-WAVE64-NEXT:    s_endpgm
1455; GFX10-WAVE64-NEXT:  .LBB13_6:
1456; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
1457; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
1458; GFX10-WAVE64-NEXT:    s_endpgm
1459;
1460; GFX10-WAVE32-LABEL: if_after_kill_block:
1461; GFX10-WAVE32:       ; %bb.0: ; %bb
1462; GFX10-WAVE32-NEXT:    s_mov_b32 s0, exec_lo
1463; GFX10-WAVE32-NEXT:    s_wqm_b32 exec_lo, exec_lo
1464; GFX10-WAVE32-NEXT:    v_cmp_nle_f32_e32 vcc_lo, 0, v1
1465; GFX10-WAVE32-NEXT:    s_and_saveexec_b32 s1, vcc_lo
1466; GFX10-WAVE32-NEXT:    s_xor_b32 s1, exec_lo, s1
1467; GFX10-WAVE32-NEXT:    s_cbranch_execz .LBB13_3
1468; GFX10-WAVE32-NEXT:  ; %bb.1: ; %bb3
1469; GFX10-WAVE32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v0
1470; GFX10-WAVE32-NEXT:    s_andn2_b32 s0, s0, vcc_lo
1471; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB13_6
1472; GFX10-WAVE32-NEXT:  ; %bb.2: ; %bb3
1473; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, vcc_lo
1474; GFX10-WAVE32-NEXT:  .LBB13_3: ; %bb4
1475; GFX10-WAVE32-NEXT:    s_or_b32 exec_lo, exec_lo, s1
1476; GFX10-WAVE32-NEXT:    image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10 dim:SQ_RSRC_IMG_1D
1477; GFX10-WAVE32-NEXT:    s_waitcnt vmcnt(0)
1478; GFX10-WAVE32-NEXT:    v_cmp_neq_f32_e32 vcc_lo, 0, v0
1479; GFX10-WAVE32-NEXT:    s_and_saveexec_b32 s0, vcc_lo
1480; GFX10-WAVE32-NEXT:    s_cbranch_execz .LBB13_5
1481; GFX10-WAVE32-NEXT:  ; %bb.4: ; %bb8
1482; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v0, 9
1483; GFX10-WAVE32-NEXT:    global_store_dword v[0:1], v0, off
1484; GFX10-WAVE32-NEXT:    s_waitcnt_vscnt null, 0x0
1485; GFX10-WAVE32-NEXT:  .LBB13_5: ; %UnifiedReturnBlock
1486; GFX10-WAVE32-NEXT:    s_endpgm
1487; GFX10-WAVE32-NEXT:  .LBB13_6:
1488; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
1489; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
1490; GFX10-WAVE32-NEXT:    s_endpgm
1491;
1492; GFX11-LABEL: if_after_kill_block:
1493; GFX11:       ; %bb.0: ; %bb
1494; GFX11-NEXT:    s_mov_b64 s[0:1], exec
1495; GFX11-NEXT:    s_wqm_b64 exec, exec
1496; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1497; GFX11-NEXT:    s_mov_b64 s[2:3], exec
1498; GFX11-NEXT:    v_cmpx_nle_f32_e32 0, v1
1499; GFX11-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
1500; GFX11-NEXT:    s_cbranch_execz .LBB13_3
1501; GFX11-NEXT:  ; %bb.1: ; %bb3
1502; GFX11-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
1503; GFX11-NEXT:    s_and_not1_b64 s[0:1], s[0:1], vcc
1504; GFX11-NEXT:    s_cbranch_scc0 .LBB13_6
1505; GFX11-NEXT:  ; %bb.2: ; %bb3
1506; GFX11-NEXT:    s_and_not1_b64 exec, exec, vcc
1507; GFX11-NEXT:  .LBB13_3: ; %bb4
1508; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1509; GFX11-NEXT:    s_or_b64 exec, exec, s[2:3]
1510; GFX11-NEXT:    image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10 dim:SQ_RSRC_IMG_1D
1511; GFX11-NEXT:    s_mov_b64 s[0:1], exec
1512; GFX11-NEXT:    s_waitcnt vmcnt(0)
1513; GFX11-NEXT:    v_cmpx_neq_f32_e32 0, v0
1514; GFX11-NEXT:    s_cbranch_execz .LBB13_5
1515; GFX11-NEXT:  ; %bb.4: ; %bb8
1516; GFX11-NEXT:    v_mov_b32_e32 v0, 9
1517; GFX11-NEXT:    global_store_b32 v[0:1], v0, off dlc
1518; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1519; GFX11-NEXT:  .LBB13_5: ; %UnifiedReturnBlock
1520; GFX11-NEXT:    s_endpgm
1521; GFX11-NEXT:  .LBB13_6:
1522; GFX11-NEXT:    s_mov_b64 exec, 0
1523; GFX11-NEXT:    exp mrt0 off, off, off, off done
1524; GFX11-NEXT:    s_endpgm
1525bb:
1526  %tmp = fcmp ult float %arg1, 0.000000e+00
1527  br i1 %tmp, label %bb3, label %bb4
1528
1529bb3:                                              ; preds = %bb
1530  %cmp.arg = fcmp olt float %arg, 0.0
1531  call void @llvm.amdgcn.kill(i1 %cmp.arg)
1532  br label %bb4
1533
1534bb4:                                              ; preds = %bb3, %bb
1535  %tmp5 = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 16, float %arg2, float %arg3, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
1536  %tmp6 = extractelement <4 x float> %tmp5, i32 0
1537  %tmp7 = fcmp une float %tmp6, 0.000000e+00
1538  br i1 %tmp7, label %bb8, label %bb9
1539
1540bb8:                                              ; preds = %bb9, %bb4
1541  store volatile i32 9, ptr addrspace(1) undef
1542  ret void
1543
1544bb9:                                              ; preds = %bb4
1545  ret void
1546}
1547
1548define amdgpu_ps void @cbranch_kill(i32 inreg %0, float %val0, float %val1) {
1549; SI-LABEL: cbranch_kill:
1550; SI:       ; %bb.0: ; %.entry
1551; SI-NEXT:    s_mov_b64 s[0:1], exec
1552; SI-NEXT:    v_mov_b32_e32 v4, 0
1553; SI-NEXT:    v_mov_b32_e32 v2, v1
1554; SI-NEXT:    v_mov_b32_e32 v3, v1
1555; SI-NEXT:    image_sample_l v1, v[1:4], s[0:7], s[0:3] dmask:0x1 da
1556; SI-NEXT:    s_waitcnt vmcnt(0)
1557; SI-NEXT:    v_cmp_ge_f32_e32 vcc, 0, v1
1558; SI-NEXT:    s_and_saveexec_b64 s[2:3], vcc
1559; SI-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
1560; SI-NEXT:    s_cbranch_execz .LBB14_3
1561; SI-NEXT:  ; %bb.1: ; %kill
1562; SI-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
1563; SI-NEXT:    ; implicit-def: $vgpr0
1564; SI-NEXT:    ; implicit-def: $vgpr1
1565; SI-NEXT:    s_cbranch_scc0 .LBB14_6
1566; SI-NEXT:  ; %bb.2: ; %kill
1567; SI-NEXT:    s_mov_b64 exec, 0
1568; SI-NEXT:  .LBB14_3: ; %Flow
1569; SI-NEXT:    s_or_saveexec_b64 s[0:1], s[2:3]
1570; SI-NEXT:    ; implicit-def: $vgpr2
1571; SI-NEXT:    s_xor_b64 exec, exec, s[0:1]
1572; SI-NEXT:  ; %bb.4: ; %live
1573; SI-NEXT:    v_mul_f32_e32 v2, v0, v1
1574; SI-NEXT:  ; %bb.5: ; %export
1575; SI-NEXT:    s_or_b64 exec, exec, s[0:1]
1576; SI-NEXT:    exp mrt0 v2, v2, v2, v2 done vm
1577; SI-NEXT:    s_endpgm
1578; SI-NEXT:  .LBB14_6:
1579; SI-NEXT:    s_mov_b64 exec, 0
1580; SI-NEXT:    exp null off, off, off, off done vm
1581; SI-NEXT:    s_endpgm
1582;
1583; GFX10-WAVE64-LABEL: cbranch_kill:
1584; GFX10-WAVE64:       ; %bb.0: ; %.entry
1585; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v2, 0
1586; GFX10-WAVE64-NEXT:    s_mov_b64 s[0:1], exec
1587; GFX10-WAVE64-NEXT:    image_sample_l v1, [v1, v1, v1, v2], s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY
1588; GFX10-WAVE64-NEXT:    s_waitcnt vmcnt(0)
1589; GFX10-WAVE64-NEXT:    v_cmp_ge_f32_e32 vcc, 0, v1
1590; GFX10-WAVE64-NEXT:    s_and_saveexec_b64 s[2:3], vcc
1591; GFX10-WAVE64-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
1592; GFX10-WAVE64-NEXT:    s_cbranch_execz .LBB14_3
1593; GFX10-WAVE64-NEXT:  ; %bb.1: ; %kill
1594; GFX10-WAVE64-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
1595; GFX10-WAVE64-NEXT:    ; implicit-def: $vgpr0
1596; GFX10-WAVE64-NEXT:    ; implicit-def: $vgpr1
1597; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB14_6
1598; GFX10-WAVE64-NEXT:  ; %bb.2: ; %kill
1599; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
1600; GFX10-WAVE64-NEXT:  .LBB14_3: ; %Flow
1601; GFX10-WAVE64-NEXT:    s_or_saveexec_b64 s[0:1], s[2:3]
1602; GFX10-WAVE64-NEXT:    ; implicit-def: $vgpr2
1603; GFX10-WAVE64-NEXT:    s_xor_b64 exec, exec, s[0:1]
1604; GFX10-WAVE64-NEXT:  ; %bb.4: ; %live
1605; GFX10-WAVE64-NEXT:    v_mul_f32_e32 v2, v0, v1
1606; GFX10-WAVE64-NEXT:  ; %bb.5: ; %export
1607; GFX10-WAVE64-NEXT:    s_or_b64 exec, exec, s[0:1]
1608; GFX10-WAVE64-NEXT:    exp mrt0 v2, v2, v2, v2 done vm
1609; GFX10-WAVE64-NEXT:    s_endpgm
1610; GFX10-WAVE64-NEXT:  .LBB14_6:
1611; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
1612; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
1613; GFX10-WAVE64-NEXT:    s_endpgm
1614;
1615; GFX10-WAVE32-LABEL: cbranch_kill:
1616; GFX10-WAVE32:       ; %bb.0: ; %.entry
1617; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v2, 0
1618; GFX10-WAVE32-NEXT:    s_mov_b32 s0, exec_lo
1619; GFX10-WAVE32-NEXT:    image_sample_l v1, [v1, v1, v1, v2], s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY
1620; GFX10-WAVE32-NEXT:    s_waitcnt vmcnt(0)
1621; GFX10-WAVE32-NEXT:    v_cmp_ge_f32_e32 vcc_lo, 0, v1
1622; GFX10-WAVE32-NEXT:    s_and_saveexec_b32 s1, vcc_lo
1623; GFX10-WAVE32-NEXT:    s_xor_b32 s1, exec_lo, s1
1624; GFX10-WAVE32-NEXT:    s_cbranch_execz .LBB14_3
1625; GFX10-WAVE32-NEXT:  ; %bb.1: ; %kill
1626; GFX10-WAVE32-NEXT:    s_andn2_b32 s0, s0, exec_lo
1627; GFX10-WAVE32-NEXT:    ; implicit-def: $vgpr0
1628; GFX10-WAVE32-NEXT:    ; implicit-def: $vgpr1
1629; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB14_6
1630; GFX10-WAVE32-NEXT:  ; %bb.2: ; %kill
1631; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
1632; GFX10-WAVE32-NEXT:  .LBB14_3: ; %Flow
1633; GFX10-WAVE32-NEXT:    s_or_saveexec_b32 s0, s1
1634; GFX10-WAVE32-NEXT:    ; implicit-def: $vgpr2
1635; GFX10-WAVE32-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
1636; GFX10-WAVE32-NEXT:  ; %bb.4: ; %live
1637; GFX10-WAVE32-NEXT:    v_mul_f32_e32 v2, v0, v1
1638; GFX10-WAVE32-NEXT:  ; %bb.5: ; %export
1639; GFX10-WAVE32-NEXT:    s_or_b32 exec_lo, exec_lo, s0
1640; GFX10-WAVE32-NEXT:    exp mrt0 v2, v2, v2, v2 done vm
1641; GFX10-WAVE32-NEXT:    s_endpgm
1642; GFX10-WAVE32-NEXT:  .LBB14_6:
1643; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
1644; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
1645; GFX10-WAVE32-NEXT:    s_endpgm
1646;
1647; GFX11-LABEL: cbranch_kill:
1648; GFX11:       ; %bb.0: ; %.entry
1649; GFX11-NEXT:    v_mov_b32_e32 v2, 0
1650; GFX11-NEXT:    s_mov_b64 s[0:1], exec
1651; GFX11-NEXT:    image_sample_l v1, [v1, v1, v1, v2], s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY
1652; GFX11-NEXT:    s_mov_b64 s[2:3], exec
1653; GFX11-NEXT:    s_waitcnt vmcnt(0)
1654; GFX11-NEXT:    v_cmpx_ge_f32_e32 0, v1
1655; GFX11-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
1656; GFX11-NEXT:    s_cbranch_execz .LBB14_3
1657; GFX11-NEXT:  ; %bb.1: ; %kill
1658; GFX11-NEXT:    s_and_not1_b64 s[0:1], s[0:1], exec
1659; GFX11-NEXT:    ; implicit-def: $vgpr0
1660; GFX11-NEXT:    ; implicit-def: $vgpr1
1661; GFX11-NEXT:    s_cbranch_scc0 .LBB14_6
1662; GFX11-NEXT:  ; %bb.2: ; %kill
1663; GFX11-NEXT:    s_mov_b64 exec, 0
1664; GFX11-NEXT:  .LBB14_3: ; %Flow
1665; GFX11-NEXT:    s_or_saveexec_b64 s[0:1], s[2:3]
1666; GFX11-NEXT:    ; implicit-def: $vgpr2
1667; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1668; GFX11-NEXT:    s_xor_b64 exec, exec, s[0:1]
1669; GFX11-NEXT:  ; %bb.4: ; %live
1670; GFX11-NEXT:    v_mul_f32_e32 v2, v0, v1
1671; GFX11-NEXT:  ; %bb.5: ; %export
1672; GFX11-NEXT:    s_or_b64 exec, exec, s[0:1]
1673; GFX11-NEXT:    exp mrt0 v2, v2, v2, v2 done
1674; GFX11-NEXT:    s_endpgm
1675; GFX11-NEXT:  .LBB14_6:
1676; GFX11-NEXT:    s_mov_b64 exec, 0
1677; GFX11-NEXT:    exp mrt0 off, off, off, off done
1678; GFX11-NEXT:    s_endpgm
1679.entry:
1680  %sample = call float @llvm.amdgcn.image.sample.l.2darray.f32.f32(i32 1, float %val1, float %val1, float %val1, float 0.000000e+00, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0)
1681  %cond0 = fcmp ugt float %sample, 0.000000e+00
1682  br i1 %cond0, label %live, label %kill
1683
1684kill:
1685  call void @llvm.amdgcn.kill(i1 false)
1686  br label %export
1687
1688live:
1689  %scale = fmul reassoc nnan nsz arcp contract float %val0, %sample
1690  br label %export
1691
1692export:
1693  %proxy = phi float [ undef, %kill ], [ %scale, %live ]
1694  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %proxy, float %proxy, float %proxy, float %proxy, i1 true, i1 true) #3
1695  ret void
1696}
1697
1698
1699define amdgpu_ps void @complex_loop(i32 inreg %cmpa, i32 %cmpb, i32 %cmpc) {
1700; SI-LABEL: complex_loop:
1701; SI:       ; %bb.0: ; %.entry
1702; SI-NEXT:    s_cmp_lt_i32 s0, 1
1703; SI-NEXT:    s_cbranch_scc1 .LBB15_7
1704; SI-NEXT:  ; %bb.1: ; %.lr.ph
1705; SI-NEXT:    s_mov_b64 s[2:3], exec
1706; SI-NEXT:    s_mov_b32 s6, 0
1707; SI-NEXT:    s_mov_b64 s[0:1], 0
1708; SI-NEXT:    s_branch .LBB15_3
1709; SI-NEXT:  .LBB15_2: ; %latch
1710; SI-NEXT:    ; in Loop: Header=BB15_3 Depth=1
1711; SI-NEXT:    s_or_b64 exec, exec, s[4:5]
1712; SI-NEXT:    s_add_i32 s6, s6, 1
1713; SI-NEXT:    v_cmp_ge_i32_e32 vcc, s6, v1
1714; SI-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
1715; SI-NEXT:    v_mov_b32_e32 v2, s6
1716; SI-NEXT:    s_andn2_b64 exec, exec, s[0:1]
1717; SI-NEXT:    s_cbranch_execz .LBB15_6
1718; SI-NEXT:  .LBB15_3: ; %hdr
1719; SI-NEXT:    ; =>This Inner Loop Header: Depth=1
1720; SI-NEXT:    v_cmp_gt_u32_e32 vcc, s6, v0
1721; SI-NEXT:    s_and_saveexec_b64 s[4:5], vcc
1722; SI-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
1723; SI-NEXT:    s_cbranch_execz .LBB15_2
1724; SI-NEXT:  ; %bb.4: ; %kill
1725; SI-NEXT:    ; in Loop: Header=BB15_3 Depth=1
1726; SI-NEXT:    s_andn2_b64 s[2:3], s[2:3], exec
1727; SI-NEXT:    s_cbranch_scc0 .LBB15_8
1728; SI-NEXT:  ; %bb.5: ; %kill
1729; SI-NEXT:    ; in Loop: Header=BB15_3 Depth=1
1730; SI-NEXT:    s_mov_b64 exec, 0
1731; SI-NEXT:    s_branch .LBB15_2
1732; SI-NEXT:  .LBB15_6: ; %Flow
1733; SI-NEXT:    s_or_b64 exec, exec, s[0:1]
1734; SI-NEXT:    exp mrt0 v2, v2, v0, v0 done vm
1735; SI-NEXT:    s_endpgm
1736; SI-NEXT:  .LBB15_7:
1737; SI-NEXT:    v_mov_b32_e32 v2, -1
1738; SI-NEXT:    exp mrt0 v2, v2, v0, v0 done vm
1739; SI-NEXT:    s_endpgm
1740; SI-NEXT:  .LBB15_8:
1741; SI-NEXT:    s_mov_b64 exec, 0
1742; SI-NEXT:    exp null off, off, off, off done vm
1743; SI-NEXT:    s_endpgm
1744;
1745; GFX10-WAVE64-LABEL: complex_loop:
1746; GFX10-WAVE64:       ; %bb.0: ; %.entry
1747; GFX10-WAVE64-NEXT:    s_cmp_lt_i32 s0, 1
1748; GFX10-WAVE64-NEXT:    s_cbranch_scc1 .LBB15_7
1749; GFX10-WAVE64-NEXT:  ; %bb.1: ; %.lr.ph
1750; GFX10-WAVE64-NEXT:    s_mov_b64 s[2:3], exec
1751; GFX10-WAVE64-NEXT:    s_mov_b32 s6, 0
1752; GFX10-WAVE64-NEXT:    s_mov_b64 s[0:1], 0
1753; GFX10-WAVE64-NEXT:    s_branch .LBB15_3
1754; GFX10-WAVE64-NEXT:  .LBB15_2: ; %latch
1755; GFX10-WAVE64-NEXT:    ; in Loop: Header=BB15_3 Depth=1
1756; GFX10-WAVE64-NEXT:    s_or_b64 exec, exec, s[4:5]
1757; GFX10-WAVE64-NEXT:    s_add_i32 s6, s6, 1
1758; GFX10-WAVE64-NEXT:    v_cmp_ge_i32_e32 vcc, s6, v1
1759; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v2, s6
1760; GFX10-WAVE64-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
1761; GFX10-WAVE64-NEXT:    s_andn2_b64 exec, exec, s[0:1]
1762; GFX10-WAVE64-NEXT:    s_cbranch_execz .LBB15_6
1763; GFX10-WAVE64-NEXT:  .LBB15_3: ; %hdr
1764; GFX10-WAVE64-NEXT:    ; =>This Inner Loop Header: Depth=1
1765; GFX10-WAVE64-NEXT:    v_cmp_gt_u32_e32 vcc, s6, v0
1766; GFX10-WAVE64-NEXT:    s_and_saveexec_b64 s[4:5], vcc
1767; GFX10-WAVE64-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
1768; GFX10-WAVE64-NEXT:    s_cbranch_execz .LBB15_2
1769; GFX10-WAVE64-NEXT:  ; %bb.4: ; %kill
1770; GFX10-WAVE64-NEXT:    ; in Loop: Header=BB15_3 Depth=1
1771; GFX10-WAVE64-NEXT:    s_andn2_b64 s[2:3], s[2:3], exec
1772; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB15_8
1773; GFX10-WAVE64-NEXT:  ; %bb.5: ; %kill
1774; GFX10-WAVE64-NEXT:    ; in Loop: Header=BB15_3 Depth=1
1775; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
1776; GFX10-WAVE64-NEXT:    s_branch .LBB15_2
1777; GFX10-WAVE64-NEXT:  .LBB15_6: ; %Flow
1778; GFX10-WAVE64-NEXT:    s_or_b64 exec, exec, s[0:1]
1779; GFX10-WAVE64-NEXT:    exp mrt0 v2, v2, v0, v0 done vm
1780; GFX10-WAVE64-NEXT:    s_endpgm
1781; GFX10-WAVE64-NEXT:  .LBB15_7:
1782; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v2, -1
1783; GFX10-WAVE64-NEXT:    exp mrt0 v2, v2, v0, v0 done vm
1784; GFX10-WAVE64-NEXT:    s_endpgm
1785; GFX10-WAVE64-NEXT:  .LBB15_8:
1786; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
1787; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
1788; GFX10-WAVE64-NEXT:    s_endpgm
1789;
1790; GFX10-WAVE32-LABEL: complex_loop:
1791; GFX10-WAVE32:       ; %bb.0: ; %.entry
1792; GFX10-WAVE32-NEXT:    s_cmp_lt_i32 s0, 1
1793; GFX10-WAVE32-NEXT:    s_cbranch_scc1 .LBB15_7
1794; GFX10-WAVE32-NEXT:  ; %bb.1: ; %.lr.ph
1795; GFX10-WAVE32-NEXT:    s_mov_b32 s1, exec_lo
1796; GFX10-WAVE32-NEXT:    s_mov_b32 s0, 0
1797; GFX10-WAVE32-NEXT:    s_mov_b32 s2, 0
1798; GFX10-WAVE32-NEXT:    s_branch .LBB15_3
1799; GFX10-WAVE32-NEXT:  .LBB15_2: ; %latch
1800; GFX10-WAVE32-NEXT:    ; in Loop: Header=BB15_3 Depth=1
1801; GFX10-WAVE32-NEXT:    s_or_b32 exec_lo, exec_lo, s3
1802; GFX10-WAVE32-NEXT:    s_add_i32 s2, s2, 1
1803; GFX10-WAVE32-NEXT:    v_cmp_ge_i32_e32 vcc_lo, s2, v1
1804; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v2, s2
1805; GFX10-WAVE32-NEXT:    s_or_b32 s0, vcc_lo, s0
1806; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, s0
1807; GFX10-WAVE32-NEXT:    s_cbranch_execz .LBB15_6
1808; GFX10-WAVE32-NEXT:  .LBB15_3: ; %hdr
1809; GFX10-WAVE32-NEXT:    ; =>This Inner Loop Header: Depth=1
1810; GFX10-WAVE32-NEXT:    v_cmp_gt_u32_e32 vcc_lo, s2, v0
1811; GFX10-WAVE32-NEXT:    s_and_saveexec_b32 s3, vcc_lo
1812; GFX10-WAVE32-NEXT:    s_xor_b32 s3, exec_lo, s3
1813; GFX10-WAVE32-NEXT:    s_cbranch_execz .LBB15_2
1814; GFX10-WAVE32-NEXT:  ; %bb.4: ; %kill
1815; GFX10-WAVE32-NEXT:    ; in Loop: Header=BB15_3 Depth=1
1816; GFX10-WAVE32-NEXT:    s_andn2_b32 s1, s1, exec_lo
1817; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB15_8
1818; GFX10-WAVE32-NEXT:  ; %bb.5: ; %kill
1819; GFX10-WAVE32-NEXT:    ; in Loop: Header=BB15_3 Depth=1
1820; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
1821; GFX10-WAVE32-NEXT:    s_branch .LBB15_2
1822; GFX10-WAVE32-NEXT:  .LBB15_6: ; %Flow
1823; GFX10-WAVE32-NEXT:    s_or_b32 exec_lo, exec_lo, s0
1824; GFX10-WAVE32-NEXT:    exp mrt0 v2, v2, v0, v0 done vm
1825; GFX10-WAVE32-NEXT:    s_endpgm
1826; GFX10-WAVE32-NEXT:  .LBB15_7:
1827; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v2, -1
1828; GFX10-WAVE32-NEXT:    exp mrt0 v2, v2, v0, v0 done vm
1829; GFX10-WAVE32-NEXT:    s_endpgm
1830; GFX10-WAVE32-NEXT:  .LBB15_8:
1831; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
1832; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
1833; GFX10-WAVE32-NEXT:    s_endpgm
1834;
1835; GFX11-LABEL: complex_loop:
1836; GFX11:       ; %bb.0: ; %.entry
1837; GFX11-NEXT:    s_cmp_lt_i32 s0, 1
1838; GFX11-NEXT:    s_cbranch_scc1 .LBB15_7
1839; GFX11-NEXT:  ; %bb.1: ; %.lr.ph
1840; GFX11-NEXT:    s_mov_b64 s[2:3], exec
1841; GFX11-NEXT:    s_mov_b32 s6, 0
1842; GFX11-NEXT:    s_mov_b64 s[0:1], 0
1843; GFX11-NEXT:    s_branch .LBB15_3
1844; GFX11-NEXT:  .LBB15_2: ; %latch
1845; GFX11-NEXT:    ; in Loop: Header=BB15_3 Depth=1
1846; GFX11-NEXT:    s_or_b64 exec, exec, s[4:5]
1847; GFX11-NEXT:    s_add_i32 s6, s6, 1
1848; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
1849; GFX11-NEXT:    v_cmp_ge_i32_e32 vcc, s6, v1
1850; GFX11-NEXT:    v_mov_b32_e32 v2, s6
1851; GFX11-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
1852; GFX11-NEXT:    s_and_not1_b64 exec, exec, s[0:1]
1853; GFX11-NEXT:    s_cbranch_execz .LBB15_6
1854; GFX11-NEXT:  .LBB15_3: ; %hdr
1855; GFX11-NEXT:    ; =>This Inner Loop Header: Depth=1
1856; GFX11-NEXT:    s_mov_b64 s[4:5], exec
1857; GFX11-NEXT:    v_cmpx_gt_u32_e64 s6, v0
1858; GFX11-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
1859; GFX11-NEXT:    s_cbranch_execz .LBB15_2
1860; GFX11-NEXT:  ; %bb.4: ; %kill
1861; GFX11-NEXT:    ; in Loop: Header=BB15_3 Depth=1
1862; GFX11-NEXT:    s_and_not1_b64 s[2:3], s[2:3], exec
1863; GFX11-NEXT:    s_cbranch_scc0 .LBB15_8
1864; GFX11-NEXT:  ; %bb.5: ; %kill
1865; GFX11-NEXT:    ; in Loop: Header=BB15_3 Depth=1
1866; GFX11-NEXT:    s_mov_b64 exec, 0
1867; GFX11-NEXT:    s_branch .LBB15_2
1868; GFX11-NEXT:  .LBB15_6: ; %Flow
1869; GFX11-NEXT:    s_or_b64 exec, exec, s[0:1]
1870; GFX11-NEXT:    exp mrt0 v2, v2, v0, v0 done
1871; GFX11-NEXT:    s_endpgm
1872; GFX11-NEXT:  .LBB15_7:
1873; GFX11-NEXT:    v_mov_b32_e32 v2, -1
1874; GFX11-NEXT:    exp mrt0 v2, v2, v0, v0 done
1875; GFX11-NEXT:    s_endpgm
1876; GFX11-NEXT:  .LBB15_8:
1877; GFX11-NEXT:    s_mov_b64 exec, 0
1878; GFX11-NEXT:    exp mrt0 off, off, off, off done
1879; GFX11-NEXT:    s_endpgm
1880.entry:
1881  %flaga = icmp sgt i32 %cmpa, 0
1882  br i1 %flaga, label %.lr.ph, label %._crit_edge
1883
1884.lr.ph:
1885  br label %hdr
1886
1887hdr:
1888  %ctr = phi i32 [ 0, %.lr.ph ], [ %ctr.next, %latch ]
1889  %flagb = icmp ugt i32 %ctr, %cmpb
1890  br i1 %flagb, label %kill, label %latch
1891
1892kill:
1893  call void @llvm.amdgcn.kill(i1 false)
1894  br label %latch
1895
1896latch:
1897  %ctr.next = add nuw nsw i32 %ctr, 1
1898  %flagc = icmp slt i32 %ctr.next, %cmpc
1899  br i1 %flagc, label %hdr, label %._crit_edge
1900
1901._crit_edge:
1902  %tmp = phi i32 [ -1, %.entry ], [ %ctr.next, %latch ]
1903  %out = bitcast i32 %tmp to float
1904  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %out, float %out, float undef, float undef, i1 true, i1 true)
1905  ret void
1906}
1907
1908define void @skip_mode_switch(i32 %arg) {
1909; WAVE64-LABEL: skip_mode_switch:
1910; WAVE64:       ; %bb.0: ; %entry
1911; WAVE64-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1912; WAVE64-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1913; WAVE64-NEXT:    s_and_saveexec_b64 s[4:5], vcc
1914; WAVE64-NEXT:    s_cbranch_execz .LBB16_2
1915; WAVE64-NEXT:  ; %bb.1: ; %bb.0
1916; WAVE64-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3
1917; WAVE64-NEXT:  .LBB16_2: ; %bb.1
1918; WAVE64-NEXT:    s_or_b64 exec, exec, s[4:5]
1919; WAVE64-NEXT:    s_setpc_b64 s[30:31]
1920;
1921; GFX10-WAVE32-LABEL: skip_mode_switch:
1922; GFX10-WAVE32:       ; %bb.0: ; %entry
1923; GFX10-WAVE32-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1924; GFX10-WAVE32-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
1925; GFX10-WAVE32-NEXT:    s_and_saveexec_b32 s4, vcc_lo
1926; GFX10-WAVE32-NEXT:    s_cbranch_execz .LBB16_2
1927; GFX10-WAVE32-NEXT:  ; %bb.1: ; %bb.0
1928; GFX10-WAVE32-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3
1929; GFX10-WAVE32-NEXT:  .LBB16_2: ; %bb.1
1930; GFX10-WAVE32-NEXT:    s_or_b32 exec_lo, exec_lo, s4
1931; GFX10-WAVE32-NEXT:    s_setpc_b64 s[30:31]
1932;
1933; GFX11-LABEL: skip_mode_switch:
1934; GFX11:       ; %bb.0: ; %entry
1935; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1936; GFX11-NEXT:    s_mov_b64 s[0:1], exec
1937; GFX11-NEXT:    v_cmpx_eq_u32_e32 0, v0
1938; GFX11-NEXT:    s_cbranch_execz .LBB16_2
1939; GFX11-NEXT:  ; %bb.1: ; %bb.0
1940; GFX11-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3
1941; GFX11-NEXT:  .LBB16_2: ; %bb.1
1942; GFX11-NEXT:    s_or_b64 exec, exec, s[0:1]
1943; GFX11-NEXT:    s_setpc_b64 s[30:31]
1944entry:
1945  %cmp = icmp eq i32 %arg, 0
1946  br i1 %cmp, label %bb.0, label %bb.1
1947
1948bb.0:
1949  call void @llvm.amdgcn.s.setreg(i32 2049, i32 3)
1950  br label %bb.1
1951
1952bb.1:
1953  ret void
1954}
1955
1956declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #3
1957declare float @llvm.amdgcn.image.sample.l.2darray.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1
1958declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1959declare void @llvm.amdgcn.kill(i1) #0
1960
1961declare void @llvm.amdgcn.s.setreg(i32 immarg, i32)
1962
1963attributes #0 = { nounwind }
1964attributes #1 = { nounwind readonly }
1965attributes #2 = { nounwind readnone speculatable }
1966attributes #3 = { inaccessiblememonly nounwind writeonly }
1967