xref: /llvm-project/llvm/test/CodeGen/AMDGPU/while-break.ll (revision 54d31bde324523d946fd87f5c5d5e271826209d6)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GCN
3
4define amdgpu_ps float @while_break(i32 %z, float %v, i32 %x, i32 %y) #0 {
5; GCN-LABEL: while_break:
6; GCN:       ; %bb.0: ; %entry
7; GCN-NEXT:    s_mov_b32 s1, -1
8; GCN-NEXT:    s_mov_b32 s0, 0
9; GCN-NEXT:    s_branch .LBB0_2
10; GCN-NEXT:  .LBB0_1: ; %Flow2
11; GCN-NEXT:    ; in Loop: Header=BB0_2 Depth=1
12; GCN-NEXT:    s_or_b32 exec_lo, exec_lo, s4
13; GCN-NEXT:    s_and_b32 s2, exec_lo, s3
14; GCN-NEXT:    s_or_b32 s0, s2, s0
15; GCN-NEXT:    s_andn2_b32 exec_lo, exec_lo, s0
16; GCN-NEXT:    s_cbranch_execz .LBB0_8
17; GCN-NEXT:  .LBB0_2: ; %header
18; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
19; GCN-NEXT:    s_add_i32 s1, s1, 1
20; GCN-NEXT:    s_mov_b32 s2, 0
21; GCN-NEXT:    v_cmp_ge_i32_e32 vcc_lo, s1, v2
22; GCN-NEXT:    s_and_saveexec_b32 s3, vcc_lo
23; GCN-NEXT:    s_xor_b32 s3, exec_lo, s3
24; GCN-NEXT:  ; %bb.3: ; %else
25; GCN-NEXT:    ; in Loop: Header=BB0_2 Depth=1
26; GCN-NEXT:    v_cmp_lt_i32_e32 vcc_lo, s1, v3
27; GCN-NEXT:    s_and_b32 s2, vcc_lo, exec_lo
28; GCN-NEXT:  ; %bb.4: ; %Flow
29; GCN-NEXT:    ; in Loop: Header=BB0_2 Depth=1
30; GCN-NEXT:    s_andn2_saveexec_b32 s3, s3
31; GCN-NEXT:  ; %bb.5: ; %if
32; GCN-NEXT:    ; in Loop: Header=BB0_2 Depth=1
33; GCN-NEXT:    v_add_f32_e32 v1, 1.0, v1
34; GCN-NEXT:    s_or_b32 s2, s2, exec_lo
35; GCN-NEXT:  ; %bb.6: ; %Flow1
36; GCN-NEXT:    ; in Loop: Header=BB0_2 Depth=1
37; GCN-NEXT:    s_or_b32 exec_lo, exec_lo, s3
38; GCN-NEXT:    s_mov_b32 s3, -1
39; GCN-NEXT:    s_and_saveexec_b32 s4, s2
40; GCN-NEXT:    s_cbranch_execz .LBB0_1
41; GCN-NEXT:  ; %bb.7: ; %latch
42; GCN-NEXT:    ; in Loop: Header=BB0_2 Depth=1
43; GCN-NEXT:    v_cmp_lt_i32_e32 vcc_lo, s1, v0
44; GCN-NEXT:    s_orn2_b32 s3, vcc_lo, exec_lo
45; GCN-NEXT:    s_branch .LBB0_1
46; GCN-NEXT:  .LBB0_8: ; %end
47; GCN-NEXT:    s_or_b32 exec_lo, exec_lo, s0
48; GCN-NEXT:    v_mov_b32_e32 v0, v1
49; GCN-NEXT:    ; return to shader part epilog
50entry:
51  br label %header
52
53header:
54  %v.1 = phi float [ %v, %entry ], [ %v.2, %latch ]
55  %ind = phi i32 [ 0, %entry], [ %ind.inc, %latch ]
56  %cc = icmp slt i32 %ind, %x
57  br i1 %cc, label %if, label %else
58
59if:
60  %v.if = fadd float %v.1, 1.0
61  br label %latch
62
63else:
64  %cc2 = icmp slt i32 %ind, %y
65  br i1 %cc2, label %latch, label %end
66
67latch:
68  %v.2 = phi float [ %v.if, %if ], [ %v.1, %else ]
69  %ind.inc = add i32 %ind, 1
70  %cc3 = icmp slt i32 %ind, %z
71  br i1 %cc3, label %end, label %header
72
73end:
74  %r = phi float [ %v.2, %latch ], [ %v.1, %else ]
75  ret float %r
76}
77
78; Just different dfs order from while_break.
79define amdgpu_ps float @while_break2(i32 %z, float %v, i32 %x, i32 %y) #0 {
80; GCN-LABEL: while_break2:
81; GCN:       ; %bb.0: ; %entry
82; GCN-NEXT:    s_mov_b32 s1, -1
83; GCN-NEXT:    s_mov_b32 s0, 0
84; GCN-NEXT:    s_branch .LBB1_2
85; GCN-NEXT:  .LBB1_1: ; %Flow2
86; GCN-NEXT:    ; in Loop: Header=BB1_2 Depth=1
87; GCN-NEXT:    s_or_b32 exec_lo, exec_lo, s4
88; GCN-NEXT:    s_and_b32 s2, exec_lo, s3
89; GCN-NEXT:    s_or_b32 s0, s2, s0
90; GCN-NEXT:    s_andn2_b32 exec_lo, exec_lo, s0
91; GCN-NEXT:    s_cbranch_execz .LBB1_8
92; GCN-NEXT:  .LBB1_2: ; %header
93; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
94; GCN-NEXT:    s_add_i32 s1, s1, 1
95; GCN-NEXT:    s_mov_b32 s2, 0
96; GCN-NEXT:    v_cmp_ge_i32_e32 vcc_lo, s1, v2
97; GCN-NEXT:    s_and_saveexec_b32 s3, vcc_lo
98; GCN-NEXT:    s_xor_b32 s3, exec_lo, s3
99; GCN-NEXT:  ; %bb.3: ; %if
100; GCN-NEXT:    ; in Loop: Header=BB1_2 Depth=1
101; GCN-NEXT:    v_add_f32_e32 v1, 1.0, v1
102; GCN-NEXT:    s_mov_b32 s2, exec_lo
103; GCN-NEXT:  ; %bb.4: ; %Flow
104; GCN-NEXT:    ; in Loop: Header=BB1_2 Depth=1
105; GCN-NEXT:    s_andn2_saveexec_b32 s3, s3
106; GCN-NEXT:  ; %bb.5: ; %else
107; GCN-NEXT:    ; in Loop: Header=BB1_2 Depth=1
108; GCN-NEXT:    v_cmp_lt_i32_e32 vcc_lo, s1, v3
109; GCN-NEXT:    s_andn2_b32 s2, s2, exec_lo
110; GCN-NEXT:    s_and_b32 s4, vcc_lo, exec_lo
111; GCN-NEXT:    s_or_b32 s2, s2, s4
112; GCN-NEXT:  ; %bb.6: ; %Flow1
113; GCN-NEXT:    ; in Loop: Header=BB1_2 Depth=1
114; GCN-NEXT:    s_or_b32 exec_lo, exec_lo, s3
115; GCN-NEXT:    s_mov_b32 s3, -1
116; GCN-NEXT:    s_and_saveexec_b32 s4, s2
117; GCN-NEXT:    s_cbranch_execz .LBB1_1
118; GCN-NEXT:  ; %bb.7: ; %latch
119; GCN-NEXT:    ; in Loop: Header=BB1_2 Depth=1
120; GCN-NEXT:    v_cmp_lt_i32_e32 vcc_lo, s1, v0
121; GCN-NEXT:    s_orn2_b32 s3, vcc_lo, exec_lo
122; GCN-NEXT:    s_branch .LBB1_1
123; GCN-NEXT:  .LBB1_8: ; %end
124; GCN-NEXT:    s_or_b32 exec_lo, exec_lo, s0
125; GCN-NEXT:    v_mov_b32_e32 v0, v1
126; GCN-NEXT:    ; return to shader part epilog
127entry:
128  br label %header
129
130header:
131  %v.1 = phi float [ %v, %entry ], [ %v.2, %latch ]
132  %ind = phi i32 [ 0, %entry], [ %ind.inc, %latch ]
133  %cc = icmp slt i32 %ind, %x
134  br i1 %cc, label %else, label %if
135
136if:
137  %v.if = fadd float %v.1, 1.0
138  br label %latch
139
140else:
141  %cc2 = icmp slt i32 %ind, %y
142  br i1 %cc2, label %latch, label %end
143
144latch:
145  %v.2 = phi float [ %v.if, %if ], [ %v.1, %else ]
146  %ind.inc = add i32 %ind, 1
147  %cc3 = icmp slt i32 %ind, %z
148  br i1 %cc3, label %end, label %header
149
150end:
151  %r = phi float [ %v.2, %latch ], [ %v.1, %else ]
152  ret float %r
153}
154
155; Two chains of phi network that have the same value from %if block.
156define amdgpu_ps < 2 x float> @while_break_two_chains_of_phi(float %v, i32 %x, i32 %y, i32 %z, ptr addrspace(1) %p) #0 {
157; GCN-LABEL: while_break_two_chains_of_phi:
158; GCN:       ; %bb.0: ; %entry
159; GCN-NEXT:    v_mov_b32_e32 v6, 0
160; GCN-NEXT:    s_mov_b32 s2, 0
161; GCN-NEXT:    s_mov_b32 s0, 0
162; GCN-NEXT:    s_branch .LBB2_2
163; GCN-NEXT:  .LBB2_1: ; %Flow1
164; GCN-NEXT:    ; in Loop: Header=BB2_2 Depth=1
165; GCN-NEXT:    s_or_b32 exec_lo, exec_lo, s4
166; GCN-NEXT:    s_and_b32 s1, exec_lo, s1
167; GCN-NEXT:    s_or_b32 s2, s1, s2
168; GCN-NEXT:    s_andn2_b32 exec_lo, exec_lo, s2
169; GCN-NEXT:    s_cbranch_execz .LBB2_6
170; GCN-NEXT:  .LBB2_2: ; %header
171; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
172; GCN-NEXT:    v_cmp_ge_i32_e64 s3, s0, v1
173; GCN-NEXT:    v_cmp_lt_i32_e32 vcc_lo, s0, v1
174; GCN-NEXT:    s_and_saveexec_b32 s4, vcc_lo
175; GCN-NEXT:    s_cbranch_execz .LBB2_4
176; GCN-NEXT:  ; %bb.3: ; %if
177; GCN-NEXT:    ; in Loop: Header=BB2_2 Depth=1
178; GCN-NEXT:    s_ashr_i32 s1, s0, 31
179; GCN-NEXT:    s_lshl_b64 s[6:7], s[0:1], 2
180; GCN-NEXT:    s_andn2_b32 s1, s3, exec_lo
181; GCN-NEXT:    v_add_co_u32 v6, vcc_lo, v4, s6
182; GCN-NEXT:    v_add_co_ci_u32_e32 v7, vcc_lo, s7, v5, vcc_lo
183; GCN-NEXT:    v_cmp_lt_i32_e32 vcc_lo, s0, v2
184; GCN-NEXT:    global_load_dword v0, v[6:7], off
185; GCN-NEXT:    s_and_b32 s3, vcc_lo, exec_lo
186; GCN-NEXT:    s_or_b32 s3, s1, s3
187; GCN-NEXT:    s_waitcnt vmcnt(0)
188; GCN-NEXT:    v_add_f32_e32 v6, 1.0, v0
189; GCN-NEXT:    v_mov_b32_e32 v0, v6
190; GCN-NEXT:  .LBB2_4: ; %Flow
191; GCN-NEXT:    ; in Loop: Header=BB2_2 Depth=1
192; GCN-NEXT:    s_or_b32 exec_lo, exec_lo, s4
193; GCN-NEXT:    s_mov_b32 s1, -1
194; GCN-NEXT:    s_and_saveexec_b32 s4, s3
195; GCN-NEXT:    s_cbranch_execz .LBB2_1
196; GCN-NEXT:  ; %bb.5: ; %latch
197; GCN-NEXT:    ; in Loop: Header=BB2_2 Depth=1
198; GCN-NEXT:    v_cmp_lt_i32_e32 vcc_lo, s0, v3
199; GCN-NEXT:    s_add_i32 s0, s0, 1
200; GCN-NEXT:    s_orn2_b32 s1, vcc_lo, exec_lo
201; GCN-NEXT:    s_branch .LBB2_1
202; GCN-NEXT:  .LBB2_6: ; %end
203; GCN-NEXT:    s_or_b32 exec_lo, exec_lo, s2
204; GCN-NEXT:    v_mov_b32_e32 v1, v6
205; GCN-NEXT:    ; return to shader part epilog
206entry:
207  br label %header
208
209header:
210  %v.1 = phi float [ %v, %entry ], [ %v.2, %latch ]
211  %v.copy = phi float [ 0.0, %entry ], [ %v.copy.2, %latch ]
212  %ind = phi i32 [ 0, %entry], [ %ind.inc, %latch ]
213  %cc = icmp slt i32 %ind, %x
214  br i1 %cc, label %if, label %latch
215
216if:
217  %v.ptr = getelementptr float, ptr addrspace(1) %p, i32 %ind
218  %v.load = load float, ptr addrspace(1) %v.ptr
219  %v.if = fadd float %v.load, 1.0
220  %cc2 = icmp slt i32 %ind, %y
221  br i1 %cc2, label %latch, label %end
222
223latch:
224  %v.2 = phi float [ %v.1, %header ], [ %v.if, %if ]
225  %v.copy.2 = phi float [ %v.copy, %header ], [ %v.if, %if ]
226  %ind.inc = add i32 %ind, 1
227  %cc3 = icmp slt i32 %ind, %z
228  br i1 %cc3, label %end, label %header
229
230end:
231  %r = phi float [ %v.2, %latch ], [ %v.if, %if ]
232  %r2 = phi float [ %v.copy.2, %latch ], [ %v.if, %if ]
233  %packed0 = insertelement < 2 x float > poison, float %r, i32 0
234  %packed1 = insertelement < 2 x float > %packed0, float %r2, i32 1
235  ret < 2 x float> %packed1
236}
237
238attributes #0 = { nounwind }
239