xref: /llvm-project/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/exit-divergence.ll (revision ae77aceba5ad6ee575d3d79eb0259624322b19f4)
1; RUN: opt %s -mtriple amdgcn-- -passes='print<uniformity>' -disable-output 2>&1 | FileCheck %s
2
3; CHECK=LABEL: UniformityInfo for function 'basic':
4; CHECK-NOT: CYCLES ASSSUMED DIVERGENT:
5; CHECK: CYCLES WITH DIVERGENT EXIT:
6; CHECK:   depth=1: entries(P T) Q
7define amdgpu_kernel void @basic(i32 %a, i32 %b, i32 %c) {
8entry:
9 %cond.uni = icmp slt i32 %a, 0
10 %tid = call i32 @llvm.amdgcn.workitem.id.x()
11 %cond.div = icmp slt i32 %tid, 0
12 br i1 %cond.uni, label %T, label %P
13
14P:
15  %pp.phi.1  = phi i32 [ %a, %entry], [ %b, %T ]
16  %pp.phi.2  = phi i32 [ %a, %entry], [ %tt.phi, %T ]
17  %pp = add i32 %b, 1
18  br label %Q
19
20Q:
21  %qq = add i32 %b, 1
22  %qq.div.1 = add i32 %pp.phi.2, 1
23  %qq.div.2 = add i32 %pp.phi.2, 1
24  br i1 %cond.div, label %T, label %exit
25
26T:
27  %tt.phi = phi i32 [ %qq, %Q ], [ %a, %entry ]
28  %tt = add i32 %b, 1
29  br label %P
30
31exit:
32; CHECK:   DIVERGENT:   %ee.1 =
33; CHECK:   DIVERGENT:   %xx.2 =
34; CHECK-NOT: DIVERGENT:     %ee.3 =
35  %ee.1 = add i32 %pp.phi.1, 1
36  %xx.2 = add i32 %pp.phi.2, 1
37  %ee.3 = add i32 %b, 1
38  ret void
39}
40
41; CHECK-LABEL: UniformityInfo for function 'outer_reducible':
42; CHECK-NOT: CYCLES ASSSUMED DIVERGENT:
43; CHECK: CYCLES WITH DIVERGENT EXIT:
44; CHECK:   depth=1: entries(H) P T R Q
45define amdgpu_kernel void @outer_reducible(i32 %a, i32 %b, i32 %c) {
46entry:
47 %cond.uni = icmp slt i32 %a, 0
48 %tid = call i32 @llvm.amdgcn.workitem.id.x()
49 %cond.div = icmp slt i32 %tid, 0
50 br label %H
51
52H:
53 br i1 %cond.uni, label %T, label %P
54
55P:
56  %pp.phi.1  = phi i32 [ %a, %H], [ %b, %T ]
57  %pp.phi.2  = phi i32 [ %a, %H], [ %tt.phi, %T ]
58  %pp = add i32 %b, 1
59  br label %Q
60
61Q:
62  %qq = add i32 %b, 1
63  %qq.div.1 = add i32 %pp.phi.2, 1
64  %qq.div.2 = add i32 %pp.phi.2, 1
65  br i1 %cond.div, label %R, label %exit
66
67R:
68  br i1 %cond.uni, label %T, label %H
69
70
71T:
72  %tt.phi = phi i32 [ %qq, %R ], [ %a, %H ]
73  %tt = add i32 %b, 1
74  br label %P
75
76exit:
77; CHECK:   DIVERGENT:   %ee.1 =
78; CHECK:   DIVERGENT:   %xx.2 =
79; CHECK-NOT: DIVERGENT:     %ee.3 =
80  %ee.1 = add i32 %pp.phi.1, 1
81  %xx.2 = add i32 %pp.phi.2, 1
82  %ee.3 = add i32 %b, 1
83  ret void
84}
85
86;      entry(div)
87;      |   \
88;      H -> B
89;      ^   /|
90;      \--C |
91;          \|
92;           X
93;
94; This has a divergent cycle due to the external divergent branch, but
95; there are no divergent exits. Hence a use at X is not divergent
96; unless the def itself is divergent.
97;
98; CHECK-LABEL: UniformityInfo for function 'no_divergent_exit':
99; CHECK: CYCLES ASSSUMED DIVERGENT:
100; CHECK:   depth=1: entries(H B) C
101; CHECK-NOT: CYCLES WITH DIVERGENT EXIT:
102define amdgpu_kernel void @no_divergent_exit(i32 %n, i32 %a, i32 %b) #0 {
103entry:
104  %tid = call i32 @llvm.amdgcn.workitem.id.x()
105  %div.cond = icmp slt i32 %tid, 0
106  %uni.cond = icmp slt i32 %a, 0
107  br i1 %div.cond, label %B, label %H
108
109H:                                                ; preds = %C, %entry
110; CHECK: DIVERGENT:  %div.merge.h =
111  %div.merge.h = phi i32 [ 0, %entry ], [ %b, %C ]
112  br label %B
113
114B:                                                ; preds = %H, %entry
115; CHECK: DIVERGENT:  %div.merge.b =
116  %div.merge.b = phi i32 [ %a, %H ], [ 1, %entry ]
117; CHECK-NOT: DIVERGENT  %bb =
118  %bb = add i32 %a, 1
119; CHECK-NOT: DIVERGENT:  br i1 %uni.cond, label %X, label %C
120  br i1 %uni.cond, label %X, label %C
121
122C:                                                ; preds = %B
123; CHECK-NOT: DIVERGENT  %cc =
124  %cc = add i32 %a, 1
125; CHECK-NOT: DIVERGENT:  br i1 %uni.cond, label %X, label %H
126  br i1 %uni.cond, label %X, label %H
127
128; CHECK-LABEL: BLOCK X
129X:                                                ; preds = %C, %B
130; CHECK: DIVERGENT:  %uni.merge.x =
131  %uni.merge.x = phi i32 [ %bb, %B ], [%cc, %C ]
132; CHECK: DIVERGENT: %div.merge.x =
133  %div.merge.x = phi i32 [ %div.merge.b, %B ], [%cc, %C ]
134  ret void
135}
136
137declare i32 @llvm.amdgcn.workitem.id.x() #0
138
139attributes #0 = { nounwind readnone }
140