1; RUN: opt %s -mtriple amdgcn-- -passes='print<uniformity>' -disable-output 2>&1 | FileCheck %s 2 3; CHECK=LABEL: UniformityInfo for function 'basic': 4; CHECK-NOT: CYCLES ASSSUMED DIVERGENT: 5; CHECK: CYCLES WITH DIVERGENT EXIT: 6; CHECK: depth=1: entries(P T) Q 7define amdgpu_kernel void @basic(i32 %a, i32 %b, i32 %c) { 8entry: 9 %cond.uni = icmp slt i32 %a, 0 10 %tid = call i32 @llvm.amdgcn.workitem.id.x() 11 %cond.div = icmp slt i32 %tid, 0 12 br i1 %cond.uni, label %T, label %P 13 14P: 15 %pp.phi.1 = phi i32 [ %a, %entry], [ %b, %T ] 16 %pp.phi.2 = phi i32 [ %a, %entry], [ %tt.phi, %T ] 17 %pp = add i32 %b, 1 18 br label %Q 19 20Q: 21 %qq = add i32 %b, 1 22 %qq.div.1 = add i32 %pp.phi.2, 1 23 %qq.div.2 = add i32 %pp.phi.2, 1 24 br i1 %cond.div, label %T, label %exit 25 26T: 27 %tt.phi = phi i32 [ %qq, %Q ], [ %a, %entry ] 28 %tt = add i32 %b, 1 29 br label %P 30 31exit: 32; CHECK: DIVERGENT: %ee.1 = 33; CHECK: DIVERGENT: %xx.2 = 34; CHECK-NOT: DIVERGENT: %ee.3 = 35 %ee.1 = add i32 %pp.phi.1, 1 36 %xx.2 = add i32 %pp.phi.2, 1 37 %ee.3 = add i32 %b, 1 38 ret void 39} 40 41; CHECK-LABEL: UniformityInfo for function 'outer_reducible': 42; CHECK-NOT: CYCLES ASSSUMED DIVERGENT: 43; CHECK: CYCLES WITH DIVERGENT EXIT: 44; CHECK: depth=1: entries(H) P T R Q 45define amdgpu_kernel void @outer_reducible(i32 %a, i32 %b, i32 %c) { 46entry: 47 %cond.uni = icmp slt i32 %a, 0 48 %tid = call i32 @llvm.amdgcn.workitem.id.x() 49 %cond.div = icmp slt i32 %tid, 0 50 br label %H 51 52H: 53 br i1 %cond.uni, label %T, label %P 54 55P: 56 %pp.phi.1 = phi i32 [ %a, %H], [ %b, %T ] 57 %pp.phi.2 = phi i32 [ %a, %H], [ %tt.phi, %T ] 58 %pp = add i32 %b, 1 59 br label %Q 60 61Q: 62 %qq = add i32 %b, 1 63 %qq.div.1 = add i32 %pp.phi.2, 1 64 %qq.div.2 = add i32 %pp.phi.2, 1 65 br i1 %cond.div, label %R, label %exit 66 67R: 68 br i1 %cond.uni, label %T, label %H 69 70 71T: 72 %tt.phi = phi i32 [ %qq, %R ], [ %a, %H ] 73 %tt = add i32 %b, 1 74 br label %P 75 76exit: 77; CHECK: DIVERGENT: %ee.1 = 78; CHECK: DIVERGENT: %xx.2 = 79; CHECK-NOT: DIVERGENT: %ee.3 = 80 %ee.1 = add i32 %pp.phi.1, 1 81 %xx.2 = add i32 %pp.phi.2, 1 82 %ee.3 = add i32 %b, 1 83 ret void 84} 85 86; entry(div) 87; | \ 88; H -> B 89; ^ /| 90; \--C | 91; \| 92; X 93; 94; This has a divergent cycle due to the external divergent branch, but 95; there are no divergent exits. Hence a use at X is not divergent 96; unless the def itself is divergent. 97; 98; CHECK-LABEL: UniformityInfo for function 'no_divergent_exit': 99; CHECK: CYCLES ASSSUMED DIVERGENT: 100; CHECK: depth=1: entries(H B) C 101; CHECK-NOT: CYCLES WITH DIVERGENT EXIT: 102define amdgpu_kernel void @no_divergent_exit(i32 %n, i32 %a, i32 %b) #0 { 103entry: 104 %tid = call i32 @llvm.amdgcn.workitem.id.x() 105 %div.cond = icmp slt i32 %tid, 0 106 %uni.cond = icmp slt i32 %a, 0 107 br i1 %div.cond, label %B, label %H 108 109H: ; preds = %C, %entry 110; CHECK: DIVERGENT: %div.merge.h = 111 %div.merge.h = phi i32 [ 0, %entry ], [ %b, %C ] 112 br label %B 113 114B: ; preds = %H, %entry 115; CHECK: DIVERGENT: %div.merge.b = 116 %div.merge.b = phi i32 [ %a, %H ], [ 1, %entry ] 117; CHECK-NOT: DIVERGENT %bb = 118 %bb = add i32 %a, 1 119; CHECK-NOT: DIVERGENT: br i1 %uni.cond, label %X, label %C 120 br i1 %uni.cond, label %X, label %C 121 122C: ; preds = %B 123; CHECK-NOT: DIVERGENT %cc = 124 %cc = add i32 %a, 1 125; CHECK-NOT: DIVERGENT: br i1 %uni.cond, label %X, label %H 126 br i1 %uni.cond, label %X, label %H 127 128; CHECK-LABEL: BLOCK X 129X: ; preds = %C, %B 130; CHECK: DIVERGENT: %uni.merge.x = 131 %uni.merge.x = phi i32 [ %bb, %B ], [%cc, %C ] 132; CHECK: DIVERGENT: %div.merge.x = 133 %div.merge.x = phi i32 [ %div.merge.b, %B ], [%cc, %C ] 134 ret void 135} 136 137declare i32 @llvm.amdgcn.workitem.id.x() #0 138 139attributes #0 = { nounwind readnone } 140