xref: /llvm-project/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/exit-divergence.ll (revision ae77aceba5ad6ee575d3d79eb0259624322b19f4)
1*ae77acebSpvanhout; RUN: opt %s -mtriple amdgcn-- -passes='print<uniformity>' -disable-output 2>&1 | FileCheck %s
2*ae77acebSpvanhout
3*ae77acebSpvanhout; CHECK=LABEL: UniformityInfo for function 'basic':
4*ae77acebSpvanhout; CHECK-NOT: CYCLES ASSSUMED DIVERGENT:
5*ae77acebSpvanhout; CHECK: CYCLES WITH DIVERGENT EXIT:
6*ae77acebSpvanhout; CHECK:   depth=1: entries(P T) Q
7*ae77acebSpvanhoutdefine amdgpu_kernel void @basic(i32 %a, i32 %b, i32 %c) {
8*ae77acebSpvanhoutentry:
9*ae77acebSpvanhout %cond.uni = icmp slt i32 %a, 0
10*ae77acebSpvanhout %tid = call i32 @llvm.amdgcn.workitem.id.x()
11*ae77acebSpvanhout %cond.div = icmp slt i32 %tid, 0
12*ae77acebSpvanhout br i1 %cond.uni, label %T, label %P
13*ae77acebSpvanhout
14*ae77acebSpvanhoutP:
15*ae77acebSpvanhout  %pp.phi.1  = phi i32 [ %a, %entry], [ %b, %T ]
16*ae77acebSpvanhout  %pp.phi.2  = phi i32 [ %a, %entry], [ %tt.phi, %T ]
17*ae77acebSpvanhout  %pp = add i32 %b, 1
18*ae77acebSpvanhout  br label %Q
19*ae77acebSpvanhout
20*ae77acebSpvanhoutQ:
21*ae77acebSpvanhout  %qq = add i32 %b, 1
22*ae77acebSpvanhout  %qq.div.1 = add i32 %pp.phi.2, 1
23*ae77acebSpvanhout  %qq.div.2 = add i32 %pp.phi.2, 1
24*ae77acebSpvanhout  br i1 %cond.div, label %T, label %exit
25*ae77acebSpvanhout
26*ae77acebSpvanhoutT:
27*ae77acebSpvanhout  %tt.phi = phi i32 [ %qq, %Q ], [ %a, %entry ]
28*ae77acebSpvanhout  %tt = add i32 %b, 1
29*ae77acebSpvanhout  br label %P
30*ae77acebSpvanhout
31*ae77acebSpvanhoutexit:
32*ae77acebSpvanhout; CHECK:   DIVERGENT:   %ee.1 =
33*ae77acebSpvanhout; CHECK:   DIVERGENT:   %xx.2 =
34*ae77acebSpvanhout; CHECK-NOT: DIVERGENT:     %ee.3 =
35*ae77acebSpvanhout  %ee.1 = add i32 %pp.phi.1, 1
36*ae77acebSpvanhout  %xx.2 = add i32 %pp.phi.2, 1
37*ae77acebSpvanhout  %ee.3 = add i32 %b, 1
38*ae77acebSpvanhout  ret void
39*ae77acebSpvanhout}
40*ae77acebSpvanhout
41*ae77acebSpvanhout; CHECK-LABEL: UniformityInfo for function 'outer_reducible':
42*ae77acebSpvanhout; CHECK-NOT: CYCLES ASSSUMED DIVERGENT:
43*ae77acebSpvanhout; CHECK: CYCLES WITH DIVERGENT EXIT:
44*ae77acebSpvanhout; CHECK:   depth=1: entries(H) P T R Q
45*ae77acebSpvanhoutdefine amdgpu_kernel void @outer_reducible(i32 %a, i32 %b, i32 %c) {
46*ae77acebSpvanhoutentry:
47*ae77acebSpvanhout %cond.uni = icmp slt i32 %a, 0
48*ae77acebSpvanhout %tid = call i32 @llvm.amdgcn.workitem.id.x()
49*ae77acebSpvanhout %cond.div = icmp slt i32 %tid, 0
50*ae77acebSpvanhout br label %H
51*ae77acebSpvanhout
52*ae77acebSpvanhoutH:
53*ae77acebSpvanhout br i1 %cond.uni, label %T, label %P
54*ae77acebSpvanhout
55*ae77acebSpvanhoutP:
56*ae77acebSpvanhout  %pp.phi.1  = phi i32 [ %a, %H], [ %b, %T ]
57*ae77acebSpvanhout  %pp.phi.2  = phi i32 [ %a, %H], [ %tt.phi, %T ]
58*ae77acebSpvanhout  %pp = add i32 %b, 1
59*ae77acebSpvanhout  br label %Q
60*ae77acebSpvanhout
61*ae77acebSpvanhoutQ:
62*ae77acebSpvanhout  %qq = add i32 %b, 1
63*ae77acebSpvanhout  %qq.div.1 = add i32 %pp.phi.2, 1
64*ae77acebSpvanhout  %qq.div.2 = add i32 %pp.phi.2, 1
65*ae77acebSpvanhout  br i1 %cond.div, label %R, label %exit
66*ae77acebSpvanhout
67*ae77acebSpvanhoutR:
68*ae77acebSpvanhout  br i1 %cond.uni, label %T, label %H
69*ae77acebSpvanhout
70*ae77acebSpvanhout
71*ae77acebSpvanhoutT:
72*ae77acebSpvanhout  %tt.phi = phi i32 [ %qq, %R ], [ %a, %H ]
73*ae77acebSpvanhout  %tt = add i32 %b, 1
74*ae77acebSpvanhout  br label %P
75*ae77acebSpvanhout
76*ae77acebSpvanhoutexit:
77*ae77acebSpvanhout; CHECK:   DIVERGENT:   %ee.1 =
78*ae77acebSpvanhout; CHECK:   DIVERGENT:   %xx.2 =
79*ae77acebSpvanhout; CHECK-NOT: DIVERGENT:     %ee.3 =
80*ae77acebSpvanhout  %ee.1 = add i32 %pp.phi.1, 1
81*ae77acebSpvanhout  %xx.2 = add i32 %pp.phi.2, 1
82*ae77acebSpvanhout  %ee.3 = add i32 %b, 1
83*ae77acebSpvanhout  ret void
84*ae77acebSpvanhout}
85*ae77acebSpvanhout
86*ae77acebSpvanhout;      entry(div)
87*ae77acebSpvanhout;      |   \
88*ae77acebSpvanhout;      H -> B
89*ae77acebSpvanhout;      ^   /|
90*ae77acebSpvanhout;      \--C |
91*ae77acebSpvanhout;          \|
92*ae77acebSpvanhout;           X
93*ae77acebSpvanhout;
94*ae77acebSpvanhout; This has a divergent cycle due to the external divergent branch, but
95*ae77acebSpvanhout; there are no divergent exits. Hence a use at X is not divergent
96*ae77acebSpvanhout; unless the def itself is divergent.
97*ae77acebSpvanhout;
98*ae77acebSpvanhout; CHECK-LABEL: UniformityInfo for function 'no_divergent_exit':
99*ae77acebSpvanhout; CHECK: CYCLES ASSSUMED DIVERGENT:
100*ae77acebSpvanhout; CHECK:   depth=1: entries(H B) C
101*ae77acebSpvanhout; CHECK-NOT: CYCLES WITH DIVERGENT EXIT:
102*ae77acebSpvanhoutdefine amdgpu_kernel void @no_divergent_exit(i32 %n, i32 %a, i32 %b) #0 {
103*ae77acebSpvanhoutentry:
104*ae77acebSpvanhout  %tid = call i32 @llvm.amdgcn.workitem.id.x()
105*ae77acebSpvanhout  %div.cond = icmp slt i32 %tid, 0
106*ae77acebSpvanhout  %uni.cond = icmp slt i32 %a, 0
107*ae77acebSpvanhout  br i1 %div.cond, label %B, label %H
108*ae77acebSpvanhout
109*ae77acebSpvanhoutH:                                                ; preds = %C, %entry
110*ae77acebSpvanhout; CHECK: DIVERGENT:  %div.merge.h =
111*ae77acebSpvanhout  %div.merge.h = phi i32 [ 0, %entry ], [ %b, %C ]
112*ae77acebSpvanhout  br label %B
113*ae77acebSpvanhout
114*ae77acebSpvanhoutB:                                                ; preds = %H, %entry
115*ae77acebSpvanhout; CHECK: DIVERGENT:  %div.merge.b =
116*ae77acebSpvanhout  %div.merge.b = phi i32 [ %a, %H ], [ 1, %entry ]
117*ae77acebSpvanhout; CHECK-NOT: DIVERGENT  %bb =
118*ae77acebSpvanhout  %bb = add i32 %a, 1
119*ae77acebSpvanhout; CHECK-NOT: DIVERGENT:  br i1 %uni.cond, label %X, label %C
120*ae77acebSpvanhout  br i1 %uni.cond, label %X, label %C
121*ae77acebSpvanhout
122*ae77acebSpvanhoutC:                                                ; preds = %B
123*ae77acebSpvanhout; CHECK-NOT: DIVERGENT  %cc =
124*ae77acebSpvanhout  %cc = add i32 %a, 1
125*ae77acebSpvanhout; CHECK-NOT: DIVERGENT:  br i1 %uni.cond, label %X, label %H
126*ae77acebSpvanhout  br i1 %uni.cond, label %X, label %H
127*ae77acebSpvanhout
128*ae77acebSpvanhout; CHECK-LABEL: BLOCK X
129*ae77acebSpvanhoutX:                                                ; preds = %C, %B
130*ae77acebSpvanhout; CHECK: DIVERGENT:  %uni.merge.x =
131*ae77acebSpvanhout  %uni.merge.x = phi i32 [ %bb, %B ], [%cc, %C ]
132*ae77acebSpvanhout; CHECK: DIVERGENT: %div.merge.x =
133*ae77acebSpvanhout  %div.merge.x = phi i32 [ %div.merge.b, %B ], [%cc, %C ]
134*ae77acebSpvanhout  ret void
135*ae77acebSpvanhout}
136*ae77acebSpvanhout
137*ae77acebSpvanhoutdeclare i32 @llvm.amdgcn.workitem.id.x() #0
138*ae77acebSpvanhout
139*ae77acebSpvanhoutattributes #0 = { nounwind readnone }
140