1; RUN: opt -mtriple=amdgcn-- -passes='loop(simple-loop-unswitch<nontrivial>),verify<loops>' -S < %s | FileCheck %s
2; RUN: opt -mtriple=amdgcn-- -passes='loop-mssa(simple-loop-unswitch<nontrivial>),verify<loops>' -S < %s | FileCheck %s
3; RUN: opt -mtriple=amdgcn-- -passes='simple-loop-unswitch<nontrivial>' -verify-memoryssa -S < %s | FileCheck %s
4
5declare i32 @a()
6declare i32 @b()
7declare i32 @c()
8
9; Non-trivial loop unswitching where there are two distinct trivial
10; conditions to unswitch within the loop. The conditions are divergent
11; and should not unswitch.
12define void @test1(ptr %ptr, i1 %cond1, i1 %cond2) {
13; CHECK-LABEL: @test1(
14entry:
15  br label %loop_begin
16; CHECK-NEXT:  entry:
17; CHECK-NEXT:    br label %loop_begin
18
19loop_begin:
20  br i1 %cond1, label %loop_a, label %loop_b
21; CHECK: loop_begin:
22; CHECK-NEXT: br i1 %cond1, label %loop_a, label %loop_b
23
24loop_a:
25  %unused.a = call i32 @a()
26  br label %latch
27; CHECK: loop_a:
28; CHECK-NEXT: %unused.a = call i32 @a()
29; CHECK-NEXT: br label %latch
30
31loop_b:
32  br i1 %cond2, label %loop_b_a, label %loop_b_b
33; CHECK: loop_b:
34; CHECK-NEXT: br i1 %cond2, label %loop_b_a, label %loop_b_b
35
36loop_b_a:
37  %unused.b = call i32 @b()
38  br label %latch
39; CHECK: loop_b_a:
40; CHECK-NEXT: %unused.b = call i32 @b()
41; CHECK-NEXT: br label %latch
42
43loop_b_b:
44  %unused.c = call i32 @c()
45  br label %latch
46; CHECK: loop_b_b:
47; CHECK-NEXT: %unused.c = call i32 @c()
48; CHECK-NEXT: br label %latch
49
50latch:
51  %v = load i1, ptr %ptr
52  br i1 %v, label %loop_begin, label %loop_exit
53; CHECK: latch:
54; CHECK-NEXT: %v = load i1, ptr %ptr
55; CHECK-NEXT: br i1 %v, label %loop_begin, label %loop_exit
56
57loop_exit:
58  ret void
59; CHECK: loop_exit:
60; CHECK-NEXT: ret void
61}
62
63; Non-trivial loop unswitching where there are two distinct trivial
64; conditions to unswitch within the loop. The conditions are known to
65; be uniform, so it should be unswitchable. However, unswitch
66; currently does not make use of UniformityAnalysis.
67define amdgpu_kernel void @test1_uniform(ptr %ptr, i1 %cond1, i1 %cond2) {
68; CHECK-LABEL: @test1_uniform(
69entry:
70  br label %loop_begin
71; CHECK-NEXT:  entry:
72; CHECK-NEXT:    br label %loop_begin
73
74loop_begin:
75  br i1 %cond1, label %loop_a, label %loop_b
76; CHECK: loop_begin:
77; CHECK-NEXT: br i1 %cond1, label %loop_a, label %loop_b
78
79loop_a:
80  %unused.a = call i32 @a()
81  br label %latch
82; CHECK: loop_a:
83; CHECK-NEXT: %unused.a = call i32 @a()
84; CHECK-NEXT: br label %latch
85
86loop_b:
87  br i1 %cond2, label %loop_b_a, label %loop_b_b
88; CHECK: loop_b:
89; CHECK-NEXT: br i1 %cond2, label %loop_b_a, label %loop_b_b
90
91loop_b_a:
92  %unused.b = call i32 @b()
93  br label %latch
94; CHECK: loop_b_a:
95; CHECK-NEXT: %unused.b = call i32 @b()
96; CHECK-NEXT: br label %latch
97
98loop_b_b:
99  %unused.c = call i32 @c()
100  br label %latch
101; CHECK: loop_b_b:
102; CHECK-NEXT: %unused.c = call i32 @c()
103; CHECK-NEXT: br label %latch
104
105latch:
106  %v = load i1, ptr %ptr
107  br i1 %v, label %loop_begin, label %loop_exit
108; CHECK: latch:
109; CHECK-NEXT: %v = load i1, ptr %ptr
110; CHECK-NEXT: br i1 %v, label %loop_begin, label %loop_exit
111
112loop_exit:
113  ret void
114; CHECK: loop_exit:
115; CHECK-NEXT: ret void
116}
117
118; Non-trivial loop unswitching where there are two distinct trivial
119; conditions to unswitch within the loop. There is no divergence
120; because it's assumed it can only execute with a workgroup of size 1.
121define void @test1_single_lane_execution(ptr %ptr, i1 %cond1, i1 %cond2) #0 {
122; CHECK-LABEL: @test1_single_lane_execution(
123entry:
124  br label %loop_begin
125; CHECK-NEXT:  entry:
126; CHECK-NEXT:    br i1 %cond1, label %entry.split.us, label %entry.split
127
128loop_begin:
129  br i1 %cond1, label %loop_a, label %loop_b
130
131loop_a:
132  call i32 @a()
133  br label %latch
134; The 'loop_a' unswitched loop.
135;
136; CHECK:       entry.split.us:
137; CHECK-NEXT:    br label %loop_begin.us
138;
139; CHECK:       loop_begin.us:
140; CHECK-NEXT:    br label %loop_a.us
141;
142; CHECK:       loop_a.us:
143; CHECK-NEXT:    call i32 @a()
144; CHECK-NEXT:    br label %latch.us
145;
146; CHECK:       latch.us:
147; CHECK-NEXT:    %[[V:.*]] = load i1, ptr %ptr
148; CHECK-NEXT:    br i1 %[[V]], label %loop_begin.us, label %loop_exit.split.us
149;
150; CHECK:       loop_exit.split.us:
151; CHECK-NEXT:    br label %loop_exit
152
153loop_b:
154  br i1 %cond2, label %loop_b_a, label %loop_b_b
155; The second unswitched condition.
156;
157; CHECK:       entry.split:
158; CHECK-NEXT:    br i1 %cond2, label %entry.split.split.us, label %entry.split.split
159
160loop_b_a:
161  call i32 @b()
162  br label %latch
163; The 'loop_b_a' unswitched loop.
164;
165; CHECK:       entry.split.split.us:
166; CHECK-NEXT:    br label %loop_begin.us1
167;
168; CHECK:       loop_begin.us1:
169; CHECK-NEXT:    br label %loop_b.us
170;
171; CHECK:       loop_b.us:
172; CHECK-NEXT:    br label %loop_b_a.us
173;
174; CHECK:       loop_b_a.us:
175; CHECK-NEXT:    call i32 @b()
176; CHECK-NEXT:    br label %latch.us2
177;
178; CHECK:       latch.us2:
179; CHECK-NEXT:    %[[V:.*]] = load i1, ptr %ptr
180; CHECK-NEXT:    br i1 %[[V]], label %loop_begin.us1, label %loop_exit.split.split.us
181;
182; CHECK:       loop_exit.split.split.us:
183; CHECK-NEXT:    br label %loop_exit.split
184
185loop_b_b:
186  call i32 @c()
187  br label %latch
188; The 'loop_b_b' unswitched loop.
189;
190; CHECK:       entry.split.split:
191; CHECK-NEXT:    br label %loop_begin
192;
193; CHECK:       loop_begin:
194; CHECK-NEXT:    br label %loop_b
195;
196; CHECK:       loop_b:
197; CHECK-NEXT:    br label %loop_b_b
198;
199; CHECK:       loop_b_b:
200; CHECK-NEXT:    call i32 @c()
201; CHECK-NEXT:    br label %latch
202;
203; CHECK:       latch:
204; CHECK-NEXT:    %[[V:.*]] = load i1, ptr %ptr
205; CHECK-NEXT:    br i1 %[[V]], label %loop_begin, label %loop_exit.split.split
206;
207; CHECK:       loop_exit.split.split:
208; CHECK-NEXT:    br label %loop_exit.split
209
210latch:
211  %v = load i1, ptr %ptr
212  br i1 %v, label %loop_begin, label %loop_exit
213
214loop_exit:
215  ret void
216; CHECK:       loop_exit.split:
217; CHECK-NEXT:    br label %loop_exit
218;
219; CHECK:       loop_exit:
220; CHECK-NEXT:    ret
221}
222
223attributes #0 = { "amdgpu-flat-work-group-size"="1,1" }
224