1; RUN: opt %s -mtriple amdgcn-- -passes='print<uniformity>' -disable-output 2>&1 | FileCheck %s
2
3; These tests have identical control flow graphs with slight changes
4; that affect cycle-info. There is a minor functional difference in
5; the branch conditions; but that is not relevant to the tests.
6
7;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8;;
9;; The inner cycle has a header (P) that dominates the join, hence
10;; both cycles are reported as converged.
11;;
12;; CHECK-LABEL: UniformityInfo for function 'headers_b_p':
13;; CHECK-NOT: CYCLES ASSSUMED DIVERGENT:
14;; CHECK-NOT: CYCLES WITH DIVERGENT EXIT:
15
16define amdgpu_kernel void @headers_b_p(i32 %a, i32 %b, i32 %c) {
17entry:
18 %cond.uni = icmp slt i32 %a, 0
19 %tid = call i32 @llvm.amdgcn.workitem.id.x()
20 %cond.div = icmp slt i32 %tid, 0
21 %a.div = add i32 %tid, %a
22 br i1 %cond.uni, label %B, label %A
23
24A:
25 br label %B
26
27B:
28 br i1 %cond.uni, label %C, label %D
29
30C:
31 br i1 %cond.uni, label %T, label %P
32
33P:
34  %pp.phi  = phi i32 [ %a, %C], [ %b, %T ]
35  %pp = add i32 %b, 1
36  br i1 %cond.uni, label %R, label %Q
37
38Q:
39  %qq = add i32 %b, 1
40  br i1 %cond.div, label %S, label %R
41
42R:
43  %rr = add i32 %b, 1
44  br label %S
45
46S:
47  %s.phi = phi i32 [ %qq, %Q ], [ %rr, %R ]
48  %ss = add i32 %pp.phi, 1
49  br i1 %cond.uni, label %D, label %T
50
51D:
52  br i1 %cond.uni, label %exit, label %A
53
54T:
55  %tt.phi = phi i32 [ %ss, %S ], [ %a, %C ]
56  %tt = add i32 %b, 1
57  br label %P
58
59exit:
60  %ee = add i32 %b, 1
61  ret void
62}
63
64;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
65;;
66;; Same as previous, but the outer cycle has a different header (A).
67;; The inner cycle has a header (P) that dominates the join, hence
68;; both cycles are reported as converged.
69;;
70;; CHECK-LABEL: UniformityInfo for function 'headers_a_p':
71;; CHECK-NOT: CYCLES ASSSUMED DIVERGENT:
72;; CHECK-NOT: CYCLES WITH DIVERGENT EXIT:
73
74define amdgpu_kernel void @headers_a_p(i32 %a, i32 %b, i32 %c) {
75entry:
76 %cond.uni = icmp slt i32 %a, 0
77 %tid = call i32 @llvm.amdgcn.workitem.id.x()
78 %cond.div = icmp slt i32 %tid, 0
79 %a.div = add i32 %tid, %a
80 br i1 %cond.uni, label %B, label %A
81
82A:
83 br label %B
84
85B:
86 br i1 %cond.uni, label %C, label %D
87
88C:
89 br i1 %cond.uni, label %T, label %P
90
91P:
92  %pp.phi  = phi i32 [ %a, %C], [ %b, %T ]
93  %pp = add i32 %b, 1
94  br i1 %cond.uni, label %R, label %Q
95
96Q:
97  %qq = add i32 %b, 1
98  br i1 %cond.div, label %S, label %R
99
100R:
101  %rr = add i32 %b, 1
102  br label %S
103
104S:
105  %s.phi = phi i32 [ %qq, %Q ], [ %rr, %R ]
106  %ss = add i32 %pp.phi, 1
107  br i1 %cond.uni, label %D, label %T
108
109D:
110  br i1 %cond.uni, label %exit, label %A
111
112T:
113  %tt.phi = phi i32 [ %ss, %S ], [ %a, %C ]
114  %tt = add i32 %b, 1
115  br label %P
116
117exit:
118  %ee = add i32 %b, 1
119  ret void
120}
121
122;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
123;;
124;; The inner cycle has a header (T) that does not dominate the join.
125;; The outer cycle has a header (B) that dominates the join. Hence
126;; only the inner cycle is reported as diverged.
127;;
128;; CHECK-LABEL: UniformityInfo for function 'headers_b_t':
129;; CHECK: CYCLES ASSSUMED DIVERGENT:
130;; CHECK:   depth=2: entries(T P) S Q R
131;; CHECK: CYCLES WITH DIVERGENT EXIT:
132;; CHECK:   depth=1: entries(B A) D T S Q P R C
133
134define amdgpu_kernel void @headers_b_t(i32 %a, i32 %b, i32 %c) {
135entry:
136 %cond.uni = icmp slt i32 %a, 0
137 %tid = call i32 @llvm.amdgcn.workitem.id.x()
138 %cond.div = icmp slt i32 %tid, 0
139 %a.div = add i32 %tid, %a
140 br i1 %cond.uni, label %A, label %B
141
142A:
143 br label %B
144
145B:
146 br i1 %cond.uni, label %C, label %D
147
148C:
149 br i1 %cond.uni, label %P, label %T
150
151P:
152  %pp.phi  = phi i32 [ %a, %C], [ %b, %T ]
153  %pp = add i32 %b, 1
154  br i1 %cond.uni, label %R, label %Q
155
156Q:
157  %qq = add i32 %b, 1
158  br i1 %cond.div, label %S, label %R
159
160R:
161  %rr = add i32 %b, 1
162  br label %S
163
164S:
165  %s.phi = phi i32 [ %qq, %Q ], [ %rr, %R ]
166  %ss = add i32 %pp.phi, 1
167  br i1 %cond.uni, label %D, label %T
168
169D:
170  br i1 %cond.uni, label %exit, label %A
171
172T:
173  %tt.phi = phi i32 [ %ss, %S ], [ %a, %C ]
174  %tt = add i32 %b, 1
175  br label %P
176
177exit:
178  %ee = add i32 %b, 1
179  ret void
180}
181
182;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
183;;
184;; The cycles have headers (A and T) that do not dominate the join.
185;; Hence the outermost cycle is reported as diverged.
186;;
187;; CHECK-LABEL: UniformityInfo for function 'headers_a_t':
188;; CHECK: CYCLES ASSSUMED DIVERGENT:
189;; CHECK:   depth=1: entries(A B) D T S Q P R C
190;; CHECK-NOT: CYCLES WITH DIVERGENT EXIT:
191
192define amdgpu_kernel void @headers_a_t(i32 %a, i32 %b, i32 %c) {
193entry:
194 %cond.uni = icmp slt i32 %a, 0
195 %tid = call i32 @llvm.amdgcn.workitem.id.x()
196 %cond.div = icmp slt i32 %tid, 0
197 %a.div = add i32 %tid, %a
198 br i1 %cond.uni, label %B, label %A
199
200A:
201 br label %B
202
203B:
204 br i1 %cond.uni, label %C, label %D
205
206C:
207 br i1 %cond.uni, label %P, label %T
208
209P:
210  %pp.phi  = phi i32 [ %a, %C], [ %b, %T ]
211  %pp = add i32 %b, 1
212  br i1 %cond.uni, label %R, label %Q
213
214Q:
215  %qq = add i32 %b, 1
216  br i1 %cond.div, label %S, label %R
217
218R:
219  %rr = add i32 %b, 1
220  br label %S
221
222S:
223  %s.phi = phi i32 [ %qq, %Q ], [ %rr, %R ]
224  %ss = add i32 %pp.phi, 1
225  br i1 %cond.uni, label %D, label %T
226
227D:
228  br i1 %cond.uni, label %exit, label %A
229
230T:
231  %tt.phi = phi i32 [ %ss, %S ], [ %a, %C ]
232  %tt = add i32 %b, 1
233  br label %P
234
235exit:
236  %ee = add i32 %b, 1
237  ret void
238}
239
240declare i32 @llvm.amdgcn.workitem.id.x() #0
241