1; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s 2 3; temporal-divergent use of value carried by divergent loop 4define amdgpu_kernel void @temporal_diverge(i32 %n, i32 %a, i32 %b) #0 { 5; CHECK-LABEL: for function 'temporal_diverge': 6; CHECK-NOT: DIVERGENT: %uni. 7; CHECK-NOT: DIVERGENT: br i1 %uni. 8 9entry: 10 %tid = call i32 @llvm.amdgcn.workitem.id.x() 11 %uni.cond = icmp slt i32 %a, 0 12 br label %H 13 14H: 15 %uni.merge.h = phi i32 [ 0, %entry ], [ %uni.inc, %H ] 16 %uni.inc = add i32 %uni.merge.h, 1 17; CHECK: DIVERGENT: %div.exitx = 18 %div.exitx = icmp slt i32 %tid, 0 19; CHECK: DIVERGENT: br i1 %div.exitx, 20 br i1 %div.exitx, label %X, label %H ; divergent branch 21 22X: 23; CHECK: DIVERGENT: %div.user = 24 %div.user = add i32 %uni.inc, 5 25 ret void 26} 27 28define amdgpu_kernel void @phi_at_exit(i32 %n, i32 %a, i32 %b) #0 { 29; CHECK-LABEL: for function 'phi_at_exit': 30; CHECK-NOT: DIVERGENT: %uni. 31; CHECK-NOT: DIVERGENT: br i1 %uni. 32 33entry: 34 %tid = call i32 @llvm.amdgcn.workitem.id.x() 35 %uni.cond = icmp slt i32 %a, 0 36 br i1 %uni.cond, label %H, label %X 37 38H: 39 %uni.merge.h = phi i32 [ 0, %entry ], [ %uni.inc, %H ] 40 %uni.inc = add i32 %uni.merge.h, 1 41; CHECK: DIVERGENT: %div.exitx = 42 %div.exitx = icmp slt i32 %tid, 0 43; CHECK: DIVERGENT: br i1 %div.exitx, 44 br i1 %div.exitx, label %X, label %H ; divergent branch 45 46X: 47; CHECK: DIVERGENT: %div.phi = 48 %div.phi = phi i32 [ 0, %entry], [ %uni.inc, %H ] 49 %div.user = add i32 %div.phi, 5 50 ret void 51} 52 53define amdgpu_kernel void @phi_after_exit(i32 %n, i32 %a, i32 %b) #0 { 54; CHECK-LABEL: for function 'phi_after_exit': 55; CHECK-NOT: DIVERGENT: %uni. 56; CHECK-NOT: DIVERGENT: br i1 %uni. 57 58entry: 59 %tid = call i32 @llvm.amdgcn.workitem.id.x() 60 %uni.cond = icmp slt i32 %a, 0 61 br i1 %uni.cond, label %H, label %Y 62 63H: 64 %uni.merge.h = phi i32 [ 0, %entry ], [ %uni.inc, %H ] 65 %uni.inc = add i32 %uni.merge.h, 1 66; CHECK: DIVERGENT: %div.exitx = 67 %div.exitx = icmp slt i32 %tid, 0 68; CHECK: DIVERGENT: br i1 %div.exitx, 69 br i1 %div.exitx, label %X, label %H ; divergent branch 70 71X: 72 br label %Y 73 74Y: 75; CHECK: DIVERGENT: %div.phi = 76 %div.phi = phi i32 [ 0, %entry], [ %uni.inc, %X ] 77 %div.user = add i32 %div.phi, 5 78 ret void 79} 80 81; temporal-divergent use of value carried by divergent loop inside a top-level loop 82define amdgpu_kernel void @temporal_diverge_inloop(i32 %n, i32 %a, i32 %b) #0 { 83; CHECK-LABEL: for function 'temporal_diverge_inloop': 84; CHECK-NOT: DIVERGENT: %uni. 85; CHECK-NOT: DIVERGENT: br i1 %uni. 86 87entry: 88 %tid = call i32 @llvm.amdgcn.workitem.id.x() 89 %uni.cond = icmp slt i32 %a, 0 90 br label %G 91 92G: 93 br label %H 94 95H: 96 %uni.merge.h = phi i32 [ 0, %G ], [ %uni.inc, %H ] 97 %uni.inc = add i32 %uni.merge.h, 1 98 %div.exitx = icmp slt i32 %tid, 0 99 br i1 %div.exitx, label %X, label %H ; divergent branch 100; CHECK: DIVERGENT: %div.exitx = 101; CHECK: DIVERGENT: br i1 %div.exitx, 102 103X: 104; CHECK: DIVERGENT: %div.user = 105 %div.user = add i32 %uni.inc, 5 106 br i1 %uni.cond, label %G, label %Y 107 108Y: 109; CHECK: DIVERGENT: %div.alsouser = 110 %div.alsouser = add i32 %uni.inc, 5 111 ret void 112} 113 114 115; temporal-uniform use of a value, definition and users are carried by a 116; surrounding divergent loop 117define amdgpu_kernel void @temporal_uniform_indivloop(i32 %n, i32 %a, i32 %b) #0 { 118; CHECK-LABEL: for function 'temporal_uniform_indivloop': 119; CHECK-NOT: DIVERGENT: %uni. 120; CHECK-NOT: DIVERGENT: br i1 %uni. 121 122entry: 123 %tid = call i32 @llvm.amdgcn.workitem.id.x() 124 %uni.cond = icmp slt i32 %a, 0 125 br label %G 126 127G: 128 br label %H 129 130H: 131 %uni.merge.h = phi i32 [ 0, %G ], [ %uni.inc, %H ] 132 %uni.inc = add i32 %uni.merge.h, 1 133 br i1 %uni.cond, label %X, label %H 134 135X: 136 %uni.user = add i32 %uni.inc, 5 137 %div.exity = icmp slt i32 %tid, 0 138; CHECK: DIVERGENT: %div.exity = 139 br i1 %div.exity, label %G, label %Y 140; CHECK: DIVERGENT: br i1 %div.exity, 141 142Y: 143 %div.alsouser = add i32 %uni.inc, 5 144 ret void 145; CHECK: DIVERGENT: %div.alsouser = 146} 147 148 149; temporal-divergent use of value carried by divergent loop, user is inside sibling loop 150define amdgpu_kernel void @temporal_diverge_loopuser(i32 %n, i32 %a, i32 %b) #0 { 151; CHECK-LABEL: for function 'temporal_diverge_loopuser': 152; CHECK-NOT: DIVERGENT: %uni. 153; CHECK-NOT: DIVERGENT: br i1 %uni. 154 155entry: 156 %tid = call i32 @llvm.amdgcn.workitem.id.x() 157 %uni.cond = icmp slt i32 %a, 0 158 br label %H 159 160H: 161 %uni.merge.h = phi i32 [ 0, %entry ], [ %uni.inc, %H ] 162 %uni.inc = add i32 %uni.merge.h, 1 163 %div.exitx = icmp slt i32 %tid, 0 164 br i1 %div.exitx, label %X, label %H ; divergent branch 165; CHECK: DIVERGENT: %div.exitx = 166; CHECK: DIVERGENT: br i1 %div.exitx, 167 168X: 169 br label %G 170 171G: 172; C HECK: DIVERGENT: %div.user = 173 %div.user = add i32 %uni.inc, 5 174 br i1 %uni.cond, label %G, label %Y 175; CHECK: DIVERGENT: %div.user = 176 177Y: 178 ret void 179} 180 181; temporal-divergent use of value carried by divergent loop, user is inside 182; sibling loop, defs and use are carried by a uniform loop 183define amdgpu_kernel void @temporal_diverge_loopuser_nested(i32 %n, i32 %a, i32 %b) #0 { 184; CHECK-LABEL: for function 'temporal_diverge_loopuser_nested': 185; CHECK-NOT: DIVERGENT: %uni. 186; CHECK-NOT: DIVERGENT: br i1 %uni. 187 188entry: 189 %tid = call i32 @llvm.amdgcn.workitem.id.x() 190 %uni.cond = icmp slt i32 %a, 0 191 br label %G 192 193G: 194 br label %H 195 196H: 197 %uni.merge.h = phi i32 [ 0, %G ], [ %uni.inc, %H ] 198 %uni.inc = add i32 %uni.merge.h, 1 199 %div.exitx = icmp slt i32 %tid, 0 200 br i1 %div.exitx, label %X, label %H ; divergent branch 201; CHECK: DIVERGENT: %div.exitx = 202; CHECK: DIVERGENT: br i1 %div.exitx, 203 204X: 205; CHECK: DIVERGENT: %div.user = 206 %div.user = add i32 %uni.inc, 5 207 br i1 %uni.cond, label %X, label %G 208 209Y: 210 ret void 211} 212 213declare i32 @llvm.amdgcn.workitem.id.x() #0 214 215attributes #0 = { nounwind readnone } 216