xref: /llvm-project/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/reducible-headers.ll (revision ae77aceba5ad6ee575d3d79eb0259624322b19f4)
1; RUN: opt %s -mtriple amdgcn-- -passes='print<uniformity>' -disable-output 2>&1 | FileCheck %s
2
3;
4;                         Entry
5;                           |
6;                           v
7;                  -------->H---------
8;                  |        |        |
9;                  |        v        |
10;                  |    --->T----    |
11;                  |    |       |    |
12;                  |    |       V    |
13;                  S<---R       P <---
14;                  ^    ^       |
15;                  |    |  Div  |
16;                  |    --- Q <--
17;                  |        |
18;                  |        v
19;                  -------- U
20;                           |
21;                           v
22;                          Exit
23;
24; The divergent branch is at Q that exits an irreducible cycle with
25; entries T and P nested inside a reducible cycle with header H. R is
26; assigned label R, which reaches P. S is a join node with label S. If
27; this is propagated to P via H, then P is incorrectly recognized as a
28; join, making the inner cycle divergent. P is always executed
29; convergently -- either by threads that reconverged at header H, or
30; by threads that are still executing the inner cycle. Thus, any PHI
31; at P should not be marked divergent.
32
33define amdgpu_kernel void @nested_irreducible(i32 %a, i32 %b, i32 %c) {
34; CHECK=LABEL: UniformityInfo for function 'nested_irreducible':
35; CHECK-NOT: CYCLES ASSSUMED DIVERGENT:
36; CHECK: CYCLES WITH DIVERGENT EXIT:
37; CHECK-DAG:   depth=2: entries(P T) R Q
38; CHECK-DAG:   depth=1: entries(H) S P T R Q U
39entry:
40  %cond.uni = icmp slt i32 %a, 0
41  %tid = call i32 @llvm.amdgcn.workitem.id.x()
42  %cond.div = icmp slt i32 %tid, 0
43  br label %H
44
45H:
46 br i1 %cond.uni, label %T, label %P
47
48P:
49; CHECK-LABEL: BLOCK P
50; CHECK-NOT:   DIVERGENT:   %pp.phi =
51; CHECK-NOT: DIVERGENT:      %pp =
52  %pp.phi  = phi i32 [ %a, %H], [ %b, %T ]
53  %pp = add i32 %b, 1
54  br label %Q
55
56Q:
57; CHECK-LABEL: BLOCK Q
58; CHECK-NOT: DIVERGENT:   %qq =
59; CHECK-NOT:   DIVERGENT:   %qq.uni =
60  %qq = add i32 %b, 1
61  %qq.uni = add i32 %pp.phi, 1
62  br i1 %cond.div, label %R, label %U
63
64R:
65  br i1 %cond.uni, label %S, label %T
66
67T:
68; CHECK-LABEL: BLOCK T
69; CHECK-NOT:   DIVERGENT:   %tt.phi =
70; CHECK-NOT: DIVERGENT:     %tt =
71  %tt.phi = phi i32 [ %qq, %R ], [ %a, %H ]
72  %tt = add i32 %b, 1
73  br label %P
74
75S:
76; CHECK-LABEL: BLOCK S
77; CHECK:   DIVERGENT:   %ss.phi =
78; CHECK-NOT: DIVERGENT:     %ss =
79  %ss.phi = phi i32 [ %qq.uni, %U ], [ %a, %R ]
80  %ss = add i32 %b, 1
81  br label %H
82
83U:
84  br i1 %cond.uni, label %S, label %exit
85
86exit:
87; CHECK: DIVERGENT:     %ee.div =
88; CHECK-NOT: DIVERGENT:     %ee =
89  %ee.div =  add i32 %qq.uni, 1
90  %ee = add i32 %b, 1
91  ret void
92}
93
94;
95;                         Entry
96;                           |
97;                           v
98;               -->-------->H---------
99;               |  ^        |        |
100;               |  |        |        |
101;               |  |        |        |
102;               |  |        |        |
103;               |  |        v        V
104;               |  R<-------T-->U--->P
105;               |          Div       |
106;               |                    |
107;               ----------- Q <-------
108;                           |
109;                           v
110;                          Exit
111;
112; This is a reducible cycle with a divergent branch at T. Disjoint
113; paths eventually join at the header H, which is assigned label H.
114; Node P is assigned label U. If the header label were propagated to
115; P, it will be incorrectly recgonized as a join. P is always executed
116; convergently -- either by threads that reconverged at header H, or
117; by threads that diverged at T (and eventually reconverged at H).
118; Thus, any PHI at P should not be marked divergent.
119
120define amdgpu_kernel void @header_label_1(i32 %a, i32 %b, i32 %c) {
121; CHECK=LABEL: UniformityInfo for function 'header_label_1':
122; CHECK-NOT: CYCLES ASSSUMED DIVERGENT:
123; CHECK: CYCLES WITH DIVERGENT EXIT:
124; CHECK:  depth=1: entries(H) Q P U T R
125entry:
126  %cond.uni = icmp slt i32 %a, 0
127  %tid = call i32 @llvm.amdgcn.workitem.id.x()
128  %cond.div = icmp slt i32 %tid, 0
129  br label %H
130
131H:
132  br i1 %cond.uni, label %T, label %P
133
134P:
135; CHECK-LABEL: BLOCK P
136; CHECK-NOT:   DIVERGENT:   %pp.phi =
137; CHECK-NOT: DIVERGENT:      %pp =
138  %pp.phi  = phi i32 [ %a, %H], [ %b, %U ]
139  %pp = add i32 %b, 1
140  br label %Q
141
142Q:
143; CHECK-LABEL: BLOCK Q
144; CHECK-NOT: DIVERGENT:   %qq =
145; CHECK-NOT:   DIVERGENT:   %qq.uni =
146  %qq = add i32 %b, 1
147  %qq.uni = add i32 %pp.phi, 1
148  br i1 %cond.uni, label %exit, label %H
149
150R:
151  br label %H
152
153T:
154  br i1 %cond.div, label %R, label %U
155
156U:
157  br label %P
158
159exit:
160; CHECK-LABEL: BLOCK exit
161; CHECK: DIVERGENT:     %ee.div =
162; CHECK-NOT: DIVERGENT:     %ee =
163  %ee.div =  add i32 %qq.uni, 1
164  %ee = add i32 %b, 1
165  ret void
166}
167
168;        entry
169;            |
170;        --> H1
171;        |   | \
172;        |   | H2(div)
173;        |   \ / \
174;        |    B   C
175;        ^     \ /
176;        \------D
177;               |
178;               X
179;
180; This is a reducible cycle with a divergent branch at H2. Disjoint
181; paths eventually join at the header D, which is assigned label D.
182; Node B is assigned label B. If the header label D were propagated to
183; B, it will be incorrectly recgonized as a join. B is always executed
184; convergently -- either by threads that reconverged at header H1, or
185; by threads that diverge at H2 (and eventually reconverged at H1).
186; Thus, any PHI at B should not be marked divergent.
187
188define amdgpu_kernel void @header_label_2(i32 %a, i32 %b, i32 %c) {
189; CHECK-LABEL: UniformityInfo for function 'header_label_2':
190; CHECK-NOT: CYCLES ASSSUMED DIVERGENT:
191; CHECK-NOT: CYCLES WITH DIVERGENT EXIT:
192entry:
193  %cond.uni = icmp slt i32 %a, 0
194  %tid = call i32 @llvm.amdgcn.workitem.id.x()
195  %cond.div = icmp slt i32 %tid, 0
196  br label %H1
197
198H1:
199  br i1 %cond.uni, label %B, label %H2
200
201H2:
202  br i1 %cond.div, label %B, label %C
203
204B:
205; CHECK-LABEL: BLOCK B
206; CHECK-NOT: DIVERGENT:     %bb.phi =
207  %bb.phi = phi i32 [%a, %H1], [%b, %H2]
208  br label %D
209
210C:
211  br label %D
212
213D:
214; CHECK-LABEL: BLOCK D
215; CHECK: DIVERGENT:     %dd.phi =
216  %dd.phi = phi i32 [%a, %B], [%b, %C]
217  br i1 %cond.uni, label %exit, label %H1
218
219exit:
220  %ee.1 = add i32 %dd.phi, 1
221  %ee.2 = add i32 %b, 1
222  ret void
223}
224
225declare i32 @llvm.amdgcn.workitem.id.x() #0
226