1*ae77acebSpvanhout; RUN: opt %s -mtriple amdgcn-- -passes='print<uniformity>' -disable-output 2>&1 | FileCheck %s 2*ae77acebSpvanhout 3*ae77acebSpvanhout; CHECK=LABEL: UniformityInfo for function 'basic': 4*ae77acebSpvanhout; CHECK-NOT: CYCLES ASSSUMED DIVERGENT: 5*ae77acebSpvanhout; CHECK: CYCLES WITH DIVERGENT EXIT: 6*ae77acebSpvanhout; CHECK: depth=1: entries(P T) Q 7*ae77acebSpvanhoutdefine amdgpu_kernel void @basic(i32 %a, i32 %b, i32 %c) { 8*ae77acebSpvanhoutentry: 9*ae77acebSpvanhout %cond.uni = icmp slt i32 %a, 0 10*ae77acebSpvanhout %tid = call i32 @llvm.amdgcn.workitem.id.x() 11*ae77acebSpvanhout %cond.div = icmp slt i32 %tid, 0 12*ae77acebSpvanhout br i1 %cond.uni, label %T, label %P 13*ae77acebSpvanhout 14*ae77acebSpvanhoutP: 15*ae77acebSpvanhout %pp.phi.1 = phi i32 [ %a, %entry], [ %b, %T ] 16*ae77acebSpvanhout %pp.phi.2 = phi i32 [ %a, %entry], [ %tt.phi, %T ] 17*ae77acebSpvanhout %pp = add i32 %b, 1 18*ae77acebSpvanhout br label %Q 19*ae77acebSpvanhout 20*ae77acebSpvanhoutQ: 21*ae77acebSpvanhout %qq = add i32 %b, 1 22*ae77acebSpvanhout %qq.div.1 = add i32 %pp.phi.2, 1 23*ae77acebSpvanhout %qq.div.2 = add i32 %pp.phi.2, 1 24*ae77acebSpvanhout br i1 %cond.div, label %T, label %exit 25*ae77acebSpvanhout 26*ae77acebSpvanhoutT: 27*ae77acebSpvanhout %tt.phi = phi i32 [ %qq, %Q ], [ %a, %entry ] 28*ae77acebSpvanhout %tt = add i32 %b, 1 29*ae77acebSpvanhout br label %P 30*ae77acebSpvanhout 31*ae77acebSpvanhoutexit: 32*ae77acebSpvanhout; CHECK: DIVERGENT: %ee.1 = 33*ae77acebSpvanhout; CHECK: DIVERGENT: %xx.2 = 34*ae77acebSpvanhout; CHECK-NOT: DIVERGENT: %ee.3 = 35*ae77acebSpvanhout %ee.1 = add i32 %pp.phi.1, 1 36*ae77acebSpvanhout %xx.2 = add i32 %pp.phi.2, 1 37*ae77acebSpvanhout %ee.3 = add i32 %b, 1 38*ae77acebSpvanhout ret void 39*ae77acebSpvanhout} 40*ae77acebSpvanhout 41*ae77acebSpvanhout; CHECK-LABEL: UniformityInfo for function 'outer_reducible': 42*ae77acebSpvanhout; CHECK-NOT: CYCLES ASSSUMED DIVERGENT: 43*ae77acebSpvanhout; CHECK: CYCLES WITH DIVERGENT EXIT: 44*ae77acebSpvanhout; CHECK: depth=1: entries(H) P T R Q 45*ae77acebSpvanhoutdefine amdgpu_kernel void @outer_reducible(i32 %a, i32 %b, i32 %c) { 46*ae77acebSpvanhoutentry: 47*ae77acebSpvanhout %cond.uni = icmp slt i32 %a, 0 48*ae77acebSpvanhout %tid = call i32 @llvm.amdgcn.workitem.id.x() 49*ae77acebSpvanhout %cond.div = icmp slt i32 %tid, 0 50*ae77acebSpvanhout br label %H 51*ae77acebSpvanhout 52*ae77acebSpvanhoutH: 53*ae77acebSpvanhout br i1 %cond.uni, label %T, label %P 54*ae77acebSpvanhout 55*ae77acebSpvanhoutP: 56*ae77acebSpvanhout %pp.phi.1 = phi i32 [ %a, %H], [ %b, %T ] 57*ae77acebSpvanhout %pp.phi.2 = phi i32 [ %a, %H], [ %tt.phi, %T ] 58*ae77acebSpvanhout %pp = add i32 %b, 1 59*ae77acebSpvanhout br label %Q 60*ae77acebSpvanhout 61*ae77acebSpvanhoutQ: 62*ae77acebSpvanhout %qq = add i32 %b, 1 63*ae77acebSpvanhout %qq.div.1 = add i32 %pp.phi.2, 1 64*ae77acebSpvanhout %qq.div.2 = add i32 %pp.phi.2, 1 65*ae77acebSpvanhout br i1 %cond.div, label %R, label %exit 66*ae77acebSpvanhout 67*ae77acebSpvanhoutR: 68*ae77acebSpvanhout br i1 %cond.uni, label %T, label %H 69*ae77acebSpvanhout 70*ae77acebSpvanhout 71*ae77acebSpvanhoutT: 72*ae77acebSpvanhout %tt.phi = phi i32 [ %qq, %R ], [ %a, %H ] 73*ae77acebSpvanhout %tt = add i32 %b, 1 74*ae77acebSpvanhout br label %P 75*ae77acebSpvanhout 76*ae77acebSpvanhoutexit: 77*ae77acebSpvanhout; CHECK: DIVERGENT: %ee.1 = 78*ae77acebSpvanhout; CHECK: DIVERGENT: %xx.2 = 79*ae77acebSpvanhout; CHECK-NOT: DIVERGENT: %ee.3 = 80*ae77acebSpvanhout %ee.1 = add i32 %pp.phi.1, 1 81*ae77acebSpvanhout %xx.2 = add i32 %pp.phi.2, 1 82*ae77acebSpvanhout %ee.3 = add i32 %b, 1 83*ae77acebSpvanhout ret void 84*ae77acebSpvanhout} 85*ae77acebSpvanhout 86*ae77acebSpvanhout; entry(div) 87*ae77acebSpvanhout; | \ 88*ae77acebSpvanhout; H -> B 89*ae77acebSpvanhout; ^ /| 90*ae77acebSpvanhout; \--C | 91*ae77acebSpvanhout; \| 92*ae77acebSpvanhout; X 93*ae77acebSpvanhout; 94*ae77acebSpvanhout; This has a divergent cycle due to the external divergent branch, but 95*ae77acebSpvanhout; there are no divergent exits. Hence a use at X is not divergent 96*ae77acebSpvanhout; unless the def itself is divergent. 97*ae77acebSpvanhout; 98*ae77acebSpvanhout; CHECK-LABEL: UniformityInfo for function 'no_divergent_exit': 99*ae77acebSpvanhout; CHECK: CYCLES ASSSUMED DIVERGENT: 100*ae77acebSpvanhout; CHECK: depth=1: entries(H B) C 101*ae77acebSpvanhout; CHECK-NOT: CYCLES WITH DIVERGENT EXIT: 102*ae77acebSpvanhoutdefine amdgpu_kernel void @no_divergent_exit(i32 %n, i32 %a, i32 %b) #0 { 103*ae77acebSpvanhoutentry: 104*ae77acebSpvanhout %tid = call i32 @llvm.amdgcn.workitem.id.x() 105*ae77acebSpvanhout %div.cond = icmp slt i32 %tid, 0 106*ae77acebSpvanhout %uni.cond = icmp slt i32 %a, 0 107*ae77acebSpvanhout br i1 %div.cond, label %B, label %H 108*ae77acebSpvanhout 109*ae77acebSpvanhoutH: ; preds = %C, %entry 110*ae77acebSpvanhout; CHECK: DIVERGENT: %div.merge.h = 111*ae77acebSpvanhout %div.merge.h = phi i32 [ 0, %entry ], [ %b, %C ] 112*ae77acebSpvanhout br label %B 113*ae77acebSpvanhout 114*ae77acebSpvanhoutB: ; preds = %H, %entry 115*ae77acebSpvanhout; CHECK: DIVERGENT: %div.merge.b = 116*ae77acebSpvanhout %div.merge.b = phi i32 [ %a, %H ], [ 1, %entry ] 117*ae77acebSpvanhout; CHECK-NOT: DIVERGENT %bb = 118*ae77acebSpvanhout %bb = add i32 %a, 1 119*ae77acebSpvanhout; CHECK-NOT: DIVERGENT: br i1 %uni.cond, label %X, label %C 120*ae77acebSpvanhout br i1 %uni.cond, label %X, label %C 121*ae77acebSpvanhout 122*ae77acebSpvanhoutC: ; preds = %B 123*ae77acebSpvanhout; CHECK-NOT: DIVERGENT %cc = 124*ae77acebSpvanhout %cc = add i32 %a, 1 125*ae77acebSpvanhout; CHECK-NOT: DIVERGENT: br i1 %uni.cond, label %X, label %H 126*ae77acebSpvanhout br i1 %uni.cond, label %X, label %H 127*ae77acebSpvanhout 128*ae77acebSpvanhout; CHECK-LABEL: BLOCK X 129*ae77acebSpvanhoutX: ; preds = %C, %B 130*ae77acebSpvanhout; CHECK: DIVERGENT: %uni.merge.x = 131*ae77acebSpvanhout %uni.merge.x = phi i32 [ %bb, %B ], [%cc, %C ] 132*ae77acebSpvanhout; CHECK: DIVERGENT: %div.merge.x = 133*ae77acebSpvanhout %div.merge.x = phi i32 [ %div.merge.b, %B ], [%cc, %C ] 134*ae77acebSpvanhout ret void 135*ae77acebSpvanhout} 136*ae77acebSpvanhout 137*ae77acebSpvanhoutdeclare i32 @llvm.amdgcn.workitem.id.x() #0 138*ae77acebSpvanhout 139*ae77acebSpvanhoutattributes #0 = { nounwind readnone } 140