xref: /llvm-project/llvm/test/CodeGen/AMDGPU/loop_break.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s
3; RUN: llc -mtriple=amdgcn -verify-machineinstrs -disable-block-placement < %s | FileCheck -check-prefix=GCN %s
4
5; Uses llvm.amdgcn.break
6
7define amdgpu_kernel void @break_loop(i32 %arg) #0 {
8; OPT-LABEL: @break_loop(
9; OPT-NEXT:  bb:
10; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
11; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
12; OPT-NEXT:    br label [[BB1:%.*]]
13; OPT:       bb1:
14; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP2:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
15; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[TMP0:%.*]], [[FLOW]] ]
16; OPT-NEXT:    [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
17; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
18; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
19; OPT:       bb4:
20; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, ptr addrspace(1) undef, align 4
21; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
22; OPT-NEXT:    br label [[FLOW]]
23; OPT:       Flow:
24; OPT-NEXT:    [[TMP0]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ]
25; OPT-NEXT:    [[TMP1:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ]
26; OPT-NEXT:    [[TMP2]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP1]], i64 [[PHI_BROKEN]])
27; OPT-NEXT:    [[TMP3:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP2]])
28; OPT-NEXT:    br i1 [[TMP3]], label [[BB9:%.*]], label [[BB1]]
29; OPT:       bb9:
30; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]])
31; OPT-NEXT:    ret void
32;
33; GCN-LABEL: break_loop:
34; GCN:       ; %bb.0: ; %bb
35; GCN-NEXT:    s_load_dword s3, s[4:5], 0x9
36; GCN-NEXT:    s_mov_b64 s[0:1], 0
37; GCN-NEXT:    s_mov_b32 s2, -1
38; GCN-NEXT:    s_waitcnt lgkmcnt(0)
39; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
40; GCN-NEXT:    s_mov_b32 s3, 0xf000
41; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
42; GCN-NEXT:    ; implicit-def: $sgpr6
43; GCN-NEXT:  .LBB0_1: ; %bb1
44; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
45; GCN-NEXT:    s_add_i32 s6, s6, 1
46; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], exec
47; GCN-NEXT:    s_cmp_gt_i32 s6, -1
48; GCN-NEXT:    s_cbranch_scc0 .LBB0_3
49; GCN-NEXT:  ; %bb.2: ; in Loop: Header=BB0_1 Depth=1
50; GCN-NEXT:    ; implicit-def: $sgpr6
51; GCN-NEXT:    s_branch .LBB0_4
52; GCN-NEXT:  .LBB0_3: ; %bb4
53; GCN-NEXT:    ; in Loop: Header=BB0_1 Depth=1
54; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 glc
55; GCN-NEXT:    s_waitcnt vmcnt(0)
56; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
57; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
58; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
59; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
60; GCN-NEXT:  .LBB0_4: ; %Flow
61; GCN-NEXT:    ; in Loop: Header=BB0_1 Depth=1
62; GCN-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
63; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
64; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
65; GCN-NEXT:    s_cbranch_execnz .LBB0_1
66; GCN-NEXT:  ; %bb.5: ; %bb9
67; GCN-NEXT:    s_endpgm
68bb:
69  %id = call i32 @llvm.amdgcn.workitem.id.x()
70  %my.tmp = sub i32 %id, %arg
71  br label %bb1
72
73bb1:
74  %lsr.iv = phi i32 [ undef, %bb ], [ %lsr.iv.next, %bb4 ]
75  %lsr.iv.next = add i32 %lsr.iv, 1
76  %cmp0 = icmp slt i32 %lsr.iv.next, 0
77  br i1 %cmp0, label %bb4, label %bb9
78
79bb4:
80  %load = load volatile i32, ptr addrspace(1) undef, align 4
81  %cmp1 = icmp slt i32 %my.tmp, %load
82  br i1 %cmp1, label %bb1, label %bb9
83
84bb9:
85  ret void
86}
87
88define amdgpu_kernel void @undef_phi_cond_break_loop(i32 %arg) #0 {
89; OPT-LABEL: @undef_phi_cond_break_loop(
90; OPT-NEXT:  bb:
91; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
92; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
93; OPT-NEXT:    br label [[BB1:%.*]]
94; OPT:       bb1:
95; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
96; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ]
97; OPT-NEXT:    [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
98; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
99; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
100; OPT:       bb4:
101; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, ptr addrspace(1) undef, align 4
102; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
103; OPT-NEXT:    br label [[FLOW]]
104; OPT:       Flow:
105; OPT-NEXT:    [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ]
106; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ undef, [[BB1]] ]
107; OPT-NEXT:    [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
108; OPT-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
109; OPT-NEXT:    br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
110; OPT:       bb9:
111; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
112; OPT-NEXT:    store volatile i32 7, ptr addrspace(3) undef, align 4
113; OPT-NEXT:    ret void
114;
115; GCN-LABEL: undef_phi_cond_break_loop:
116; GCN:       ; %bb.0: ; %bb
117; GCN-NEXT:    s_load_dword s3, s[4:5], 0x9
118; GCN-NEXT:    s_mov_b64 s[0:1], 0
119; GCN-NEXT:    s_mov_b32 s2, -1
120; GCN-NEXT:    s_waitcnt lgkmcnt(0)
121; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
122; GCN-NEXT:    s_mov_b32 s3, 0xf000
123; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
124; GCN-NEXT:    ; implicit-def: $sgpr6
125; GCN-NEXT:  .LBB1_1: ; %bb1
126; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
127; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
128; GCN-NEXT:    s_cmp_gt_i32 s6, -1
129; GCN-NEXT:    s_cbranch_scc1 .LBB1_3
130; GCN-NEXT:  ; %bb.2: ; %bb4
131; GCN-NEXT:    ; in Loop: Header=BB1_1 Depth=1
132; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 glc
133; GCN-NEXT:    s_waitcnt vmcnt(0)
134; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
135; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
136; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
137; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
138; GCN-NEXT:  .LBB1_3: ; %Flow
139; GCN-NEXT:    ; in Loop: Header=BB1_1 Depth=1
140; GCN-NEXT:    s_add_i32 s6, s6, 1
141; GCN-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
142; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
143; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
144; GCN-NEXT:    s_cbranch_execnz .LBB1_1
145; GCN-NEXT:  ; %bb.4: ; %bb9
146; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
147; GCN-NEXT:    v_mov_b32_e32 v0, 7
148; GCN-NEXT:    s_mov_b32 m0, -1
149; GCN-NEXT:    ds_write_b32 v0, v0
150; GCN-NEXT:    s_endpgm
151bb:
152  %id = call i32 @llvm.amdgcn.workitem.id.x()
153  %my.tmp = sub i32 %id, %arg
154  br label %bb1
155
156bb1:                                              ; preds = %Flow, %bb
157  %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
158  %lsr.iv.next = add i32 %lsr.iv, 1
159  %cmp0 = icmp slt i32 %lsr.iv.next, 0
160  br i1 %cmp0, label %bb4, label %Flow
161
162bb4:                                              ; preds = %bb1
163  %load = load volatile i32, ptr addrspace(1) undef, align 4
164  %cmp1 = icmp sge i32 %my.tmp, %load
165  br label %Flow
166
167Flow:                                             ; preds = %bb4, %bb1
168  %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
169  %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ undef, %bb1 ]
170  br i1 %my.tmp3, label %bb9, label %bb1
171
172bb9:                                              ; preds = %Flow
173  store volatile i32 7, ptr addrspace(3) undef
174  ret void
175}
176
177; FIXME: ConstantExpr compare of address to null folds away
178@lds = addrspace(3) global i32 undef
179
180define amdgpu_kernel void @constexpr_phi_cond_break_loop(i32 %arg) #0 {
181; OPT-LABEL: @constexpr_phi_cond_break_loop(
182; OPT-NEXT:  bb:
183; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
184; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
185; OPT-NEXT:    br label [[BB1:%.*]]
186; OPT:       bb1:
187; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
188; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ]
189; OPT-NEXT:    [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
190; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
191; OPT-NEXT:    [[CMP2:%.*]] = icmp ne ptr addrspace(3) inttoptr (i32 4 to ptr addrspace(3)), @lds
192; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
193; OPT:       bb4:
194; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, ptr addrspace(1) undef, align 4
195; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
196; OPT-NEXT:    br label [[FLOW]]
197; OPT:       Flow:
198; OPT-NEXT:    [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ]
199; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ [[CMP2]], [[BB1]] ]
200; OPT-NEXT:    [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
201; OPT-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
202; OPT-NEXT:    br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
203; OPT:       bb9:
204; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
205; OPT-NEXT:    store volatile i32 7, ptr addrspace(3) undef, align 4
206; OPT-NEXT:    ret void
207;
208; GCN-LABEL: constexpr_phi_cond_break_loop:
209; GCN:       ; %bb.0: ; %bb
210; GCN-NEXT:    s_load_dword s3, s[4:5], 0x9
211; GCN-NEXT:    s_mov_b64 s[0:1], 0
212; GCN-NEXT:    s_mov_b32 s2, -1
213; GCN-NEXT:    s_waitcnt lgkmcnt(0)
214; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
215; GCN-NEXT:    s_mov_b32 s3, 0xf000
216; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
217; GCN-NEXT:    ; implicit-def: $sgpr6
218; GCN-NEXT:  .LBB2_1: ; %bb1
219; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
220; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], exec
221; GCN-NEXT:    s_cmp_gt_i32 s6, -1
222; GCN-NEXT:    s_cbranch_scc1 .LBB2_3
223; GCN-NEXT:  ; %bb.2: ; %bb4
224; GCN-NEXT:    ; in Loop: Header=BB2_1 Depth=1
225; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 glc
226; GCN-NEXT:    s_waitcnt vmcnt(0)
227; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
228; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
229; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
230; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
231; GCN-NEXT:  .LBB2_3: ; %Flow
232; GCN-NEXT:    ; in Loop: Header=BB2_1 Depth=1
233; GCN-NEXT:    s_add_i32 s6, s6, 1
234; GCN-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
235; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
236; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
237; GCN-NEXT:    s_cbranch_execnz .LBB2_1
238; GCN-NEXT:  ; %bb.4: ; %bb9
239; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
240; GCN-NEXT:    v_mov_b32_e32 v0, 7
241; GCN-NEXT:    s_mov_b32 m0, -1
242; GCN-NEXT:    ds_write_b32 v0, v0
243; GCN-NEXT:    s_endpgm
244bb:
245  %id = call i32 @llvm.amdgcn.workitem.id.x()
246  %my.tmp = sub i32 %id, %arg
247  br label %bb1
248
249bb1:                                              ; preds = %Flow, %bb
250  %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
251  %lsr.iv.next = add i32 %lsr.iv, 1
252  %cmp0 = icmp slt i32 %lsr.iv.next, 0
253  %cmp2 = icmp ne ptr addrspace(3) inttoptr (i32 4 to ptr addrspace(3)), @lds
254  br i1 %cmp0, label %bb4, label %Flow
255
256bb4:                                              ; preds = %bb1
257  %load = load volatile i32, ptr addrspace(1) undef, align 4
258  %cmp1 = icmp sge i32 %my.tmp, %load
259  br label %Flow
260
261Flow:                                             ; preds = %bb4, %bb1
262  %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
263  %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ %cmp2, %bb1 ]
264  br i1 %my.tmp3, label %bb9, label %bb1
265
266bb9:                                              ; preds = %Flow
267  store volatile i32 7, ptr addrspace(3) undef
268  ret void
269}
270
271define amdgpu_kernel void @true_phi_cond_break_loop(i32 %arg) #0 {
272; OPT-LABEL: @true_phi_cond_break_loop(
273; OPT-NEXT:  bb:
274; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
275; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
276; OPT-NEXT:    br label [[BB1:%.*]]
277; OPT:       bb1:
278; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
279; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ]
280; OPT-NEXT:    [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
281; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
282; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
283; OPT:       bb4:
284; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, ptr addrspace(1) undef, align 4
285; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
286; OPT-NEXT:    br label [[FLOW]]
287; OPT:       Flow:
288; OPT-NEXT:    [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ]
289; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ]
290; OPT-NEXT:    [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
291; OPT-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
292; OPT-NEXT:    br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
293; OPT:       bb9:
294; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
295; OPT-NEXT:    store volatile i32 7, ptr addrspace(3) undef, align 4
296; OPT-NEXT:    ret void
297;
298; GCN-LABEL: true_phi_cond_break_loop:
299; GCN:       ; %bb.0: ; %bb
300; GCN-NEXT:    s_load_dword s3, s[4:5], 0x9
301; GCN-NEXT:    s_mov_b64 s[0:1], 0
302; GCN-NEXT:    s_mov_b32 s2, -1
303; GCN-NEXT:    s_waitcnt lgkmcnt(0)
304; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
305; GCN-NEXT:    s_mov_b32 s3, 0xf000
306; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
307; GCN-NEXT:    ; implicit-def: $sgpr6
308; GCN-NEXT:  .LBB3_1: ; %bb1
309; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
310; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], exec
311; GCN-NEXT:    s_cmp_gt_i32 s6, -1
312; GCN-NEXT:    s_cbranch_scc1 .LBB3_3
313; GCN-NEXT:  ; %bb.2: ; %bb4
314; GCN-NEXT:    ; in Loop: Header=BB3_1 Depth=1
315; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 glc
316; GCN-NEXT:    s_waitcnt vmcnt(0)
317; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
318; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
319; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
320; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
321; GCN-NEXT:  .LBB3_3: ; %Flow
322; GCN-NEXT:    ; in Loop: Header=BB3_1 Depth=1
323; GCN-NEXT:    s_add_i32 s6, s6, 1
324; GCN-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
325; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
326; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
327; GCN-NEXT:    s_cbranch_execnz .LBB3_1
328; GCN-NEXT:  ; %bb.4: ; %bb9
329; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
330; GCN-NEXT:    v_mov_b32_e32 v0, 7
331; GCN-NEXT:    s_mov_b32 m0, -1
332; GCN-NEXT:    ds_write_b32 v0, v0
333; GCN-NEXT:    s_endpgm
334bb:
335  %id = call i32 @llvm.amdgcn.workitem.id.x()
336  %my.tmp = sub i32 %id, %arg
337  br label %bb1
338
339bb1:                                              ; preds = %Flow, %bb
340  %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
341  %lsr.iv.next = add i32 %lsr.iv, 1
342  %cmp0 = icmp slt i32 %lsr.iv.next, 0
343  br i1 %cmp0, label %bb4, label %Flow
344
345bb4:                                              ; preds = %bb1
346  %load = load volatile i32, ptr addrspace(1) undef, align 4
347  %cmp1 = icmp sge i32 %my.tmp, %load
348  br label %Flow
349
350Flow:                                             ; preds = %bb4, %bb1
351  %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
352  %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
353  br i1 %my.tmp3, label %bb9, label %bb1
354
355bb9:                                              ; preds = %Flow
356  store volatile i32 7, ptr addrspace(3) undef
357  ret void
358}
359
360define amdgpu_kernel void @false_phi_cond_break_loop(i32 %arg) #0 {
361; OPT-LABEL: @false_phi_cond_break_loop(
362; OPT-NEXT:  bb:
363; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
364; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
365; OPT-NEXT:    br label [[BB1:%.*]]
366; OPT:       bb1:
367; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
368; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ]
369; OPT-NEXT:    [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
370; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
371; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
372; OPT:       bb4:
373; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, ptr addrspace(1) undef, align 4
374; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
375; OPT-NEXT:    br label [[FLOW]]
376; OPT:       Flow:
377; OPT-NEXT:    [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ]
378; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ false, [[BB1]] ]
379; OPT-NEXT:    [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
380; OPT-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
381; OPT-NEXT:    br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
382; OPT:       bb9:
383; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
384; OPT-NEXT:    store volatile i32 7, ptr addrspace(3) undef, align 4
385; OPT-NEXT:    ret void
386;
387; GCN-LABEL: false_phi_cond_break_loop:
388; GCN:       ; %bb.0: ; %bb
389; GCN-NEXT:    s_load_dword s3, s[4:5], 0x9
390; GCN-NEXT:    s_mov_b64 s[0:1], 0
391; GCN-NEXT:    s_mov_b32 s2, -1
392; GCN-NEXT:    s_waitcnt lgkmcnt(0)
393; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
394; GCN-NEXT:    s_mov_b32 s3, 0xf000
395; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
396; GCN-NEXT:    ; implicit-def: $sgpr6
397; GCN-NEXT:  .LBB4_1: ; %bb1
398; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
399; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
400; GCN-NEXT:    s_cmp_gt_i32 s6, -1
401; GCN-NEXT:    s_cbranch_scc1 .LBB4_3
402; GCN-NEXT:  ; %bb.2: ; %bb4
403; GCN-NEXT:    ; in Loop: Header=BB4_1 Depth=1
404; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 glc
405; GCN-NEXT:    s_waitcnt vmcnt(0)
406; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
407; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
408; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
409; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
410; GCN-NEXT:  .LBB4_3: ; %Flow
411; GCN-NEXT:    ; in Loop: Header=BB4_1 Depth=1
412; GCN-NEXT:    s_add_i32 s6, s6, 1
413; GCN-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
414; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
415; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
416; GCN-NEXT:    s_cbranch_execnz .LBB4_1
417; GCN-NEXT:  ; %bb.4: ; %bb9
418; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
419; GCN-NEXT:    v_mov_b32_e32 v0, 7
420; GCN-NEXT:    s_mov_b32 m0, -1
421; GCN-NEXT:    ds_write_b32 v0, v0
422; GCN-NEXT:    s_endpgm
423bb:
424  %id = call i32 @llvm.amdgcn.workitem.id.x()
425  %my.tmp = sub i32 %id, %arg
426  br label %bb1
427
428bb1:                                              ; preds = %Flow, %bb
429  %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
430  %lsr.iv.next = add i32 %lsr.iv, 1
431  %cmp0 = icmp slt i32 %lsr.iv.next, 0
432  br i1 %cmp0, label %bb4, label %Flow
433
434bb4:                                              ; preds = %bb1
435  %load = load volatile i32, ptr addrspace(1) undef, align 4
436  %cmp1 = icmp sge i32 %my.tmp, %load
437  br label %Flow
438
439Flow:                                             ; preds = %bb4, %bb1
440  %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
441  %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ false, %bb1 ]
442  br i1 %my.tmp3, label %bb9, label %bb1
443
444bb9:                                              ; preds = %Flow
445  store volatile i32 7, ptr addrspace(3) undef
446  ret void
447}
448
449; Swap order of branches in flow block so that the true phi is
450; continue.
451
452define amdgpu_kernel void @invert_true_phi_cond_break_loop(i32 %arg) #0 {
453; OPT-LABEL: @invert_true_phi_cond_break_loop(
454; OPT-NEXT:  bb:
455; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
456; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
457; OPT-NEXT:    br label [[BB1:%.*]]
458; OPT:       bb1:
459; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
460; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ]
461; OPT-NEXT:    [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
462; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
463; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
464; OPT:       bb4:
465; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, ptr addrspace(1) undef, align 4
466; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
467; OPT-NEXT:    br label [[FLOW]]
468; OPT:       Flow:
469; OPT-NEXT:    [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ]
470; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ]
471; OPT-NEXT:    [[MY_TMP3_INV:%.*]] = xor i1 [[MY_TMP3]], true
472; OPT-NEXT:    [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3_INV]], i64 [[PHI_BROKEN]])
473; OPT-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
474; OPT-NEXT:    br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
475; OPT:       bb9:
476; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
477; OPT-NEXT:    store volatile i32 7, ptr addrspace(3) undef, align 4
478; OPT-NEXT:    ret void
479;
480; GCN-LABEL: invert_true_phi_cond_break_loop:
481; GCN:       ; %bb.0: ; %bb
482; GCN-NEXT:    s_load_dword s3, s[4:5], 0x9
483; GCN-NEXT:    s_mov_b64 s[0:1], 0
484; GCN-NEXT:    s_mov_b32 s2, -1
485; GCN-NEXT:    s_waitcnt lgkmcnt(0)
486; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
487; GCN-NEXT:    s_mov_b32 s3, 0xf000
488; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
489; GCN-NEXT:    ; implicit-def: $sgpr6
490; GCN-NEXT:  .LBB5_1: ; %bb1
491; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
492; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], exec
493; GCN-NEXT:    s_cmp_gt_i32 s6, -1
494; GCN-NEXT:    s_cbranch_scc1 .LBB5_3
495; GCN-NEXT:  ; %bb.2: ; %bb4
496; GCN-NEXT:    ; in Loop: Header=BB5_1 Depth=1
497; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 glc
498; GCN-NEXT:    s_waitcnt vmcnt(0)
499; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
500; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
501; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
502; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
503; GCN-NEXT:  .LBB5_3: ; %Flow
504; GCN-NEXT:    ; in Loop: Header=BB5_1 Depth=1
505; GCN-NEXT:    s_xor_b64 s[8:9], s[4:5], -1
506; GCN-NEXT:    s_add_i32 s6, s6, 1
507; GCN-NEXT:    s_and_b64 s[8:9], exec, s[8:9]
508; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
509; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
510; GCN-NEXT:    s_cbranch_execnz .LBB5_1
511; GCN-NEXT:  ; %bb.4: ; %bb9
512; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
513; GCN-NEXT:    v_mov_b32_e32 v0, 7
514; GCN-NEXT:    s_mov_b32 m0, -1
515; GCN-NEXT:    ds_write_b32 v0, v0
516; GCN-NEXT:    s_endpgm
517bb:
518  %id = call i32 @llvm.amdgcn.workitem.id.x()
519  %my.tmp = sub i32 %id, %arg
520  br label %bb1
521
522bb1:                                              ; preds = %Flow, %bb
523  %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
524  %lsr.iv.next = add i32 %lsr.iv, 1
525  %cmp0 = icmp slt i32 %lsr.iv.next, 0
526  br i1 %cmp0, label %bb4, label %Flow
527
528bb4:                                              ; preds = %bb1
529  %load = load volatile i32, ptr addrspace(1) undef, align 4
530  %cmp1 = icmp sge i32 %my.tmp, %load
531  br label %Flow
532
533Flow:                                             ; preds = %bb4, %bb1
534  %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
535  %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
536  br i1 %my.tmp3, label %bb1, label %bb9
537
538bb9:                                              ; preds = %Flow
539  store volatile i32 7, ptr addrspace(3) undef
540  ret void
541}
542
543declare i32 @llvm.amdgcn.workitem.id.x() #1
544
545attributes #0 = { nounwind }
546attributes #1 = { nounwind readnone }
547