1; RUN: opt %s -mtriple amdgcn-- -passes='print<uniformity>' -disable-output 2>&1 | FileCheck %s 2 3; These tests have identical control flow graphs with slight changes 4; that affect cycle-info. There is a minor functional difference in 5; the branch conditions; but that is not relevant to the tests. 6 7;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 8;; 9;; The inner cycle has a header (P) that dominates the join, hence 10;; both cycles are reported as converged. 11;; 12;; CHECK-LABEL: UniformityInfo for function 'headers_b_p': 13;; CHECK-NOT: CYCLES ASSSUMED DIVERGENT: 14;; CHECK-NOT: CYCLES WITH DIVERGENT EXIT: 15 16define amdgpu_kernel void @headers_b_p(i32 %a, i32 %b, i32 %c) { 17entry: 18 %cond.uni = icmp slt i32 %a, 0 19 %tid = call i32 @llvm.amdgcn.workitem.id.x() 20 %cond.div = icmp slt i32 %tid, 0 21 %a.div = add i32 %tid, %a 22 br i1 %cond.uni, label %B, label %A 23 24A: 25 br label %B 26 27B: 28 br i1 %cond.uni, label %C, label %D 29 30C: 31 br i1 %cond.uni, label %T, label %P 32 33P: 34 %pp.phi = phi i32 [ %a, %C], [ %b, %T ] 35 %pp = add i32 %b, 1 36 br i1 %cond.uni, label %R, label %Q 37 38Q: 39 %qq = add i32 %b, 1 40 br i1 %cond.div, label %S, label %R 41 42R: 43 %rr = add i32 %b, 1 44 br label %S 45 46S: 47 %s.phi = phi i32 [ %qq, %Q ], [ %rr, %R ] 48 %ss = add i32 %pp.phi, 1 49 br i1 %cond.uni, label %D, label %T 50 51D: 52 br i1 %cond.uni, label %exit, label %A 53 54T: 55 %tt.phi = phi i32 [ %ss, %S ], [ %a, %C ] 56 %tt = add i32 %b, 1 57 br label %P 58 59exit: 60 %ee = add i32 %b, 1 61 ret void 62} 63 64;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 65;; 66;; Same as previous, but the outer cycle has a different header (A). 67;; The inner cycle has a header (P) that dominates the join, hence 68;; both cycles are reported as converged. 69;; 70;; CHECK-LABEL: UniformityInfo for function 'headers_a_p': 71;; CHECK-NOT: CYCLES ASSSUMED DIVERGENT: 72;; CHECK-NOT: CYCLES WITH DIVERGENT EXIT: 73 74define amdgpu_kernel void @headers_a_p(i32 %a, i32 %b, i32 %c) { 75entry: 76 %cond.uni = icmp slt i32 %a, 0 77 %tid = call i32 @llvm.amdgcn.workitem.id.x() 78 %cond.div = icmp slt i32 %tid, 0 79 %a.div = add i32 %tid, %a 80 br i1 %cond.uni, label %B, label %A 81 82A: 83 br label %B 84 85B: 86 br i1 %cond.uni, label %C, label %D 87 88C: 89 br i1 %cond.uni, label %T, label %P 90 91P: 92 %pp.phi = phi i32 [ %a, %C], [ %b, %T ] 93 %pp = add i32 %b, 1 94 br i1 %cond.uni, label %R, label %Q 95 96Q: 97 %qq = add i32 %b, 1 98 br i1 %cond.div, label %S, label %R 99 100R: 101 %rr = add i32 %b, 1 102 br label %S 103 104S: 105 %s.phi = phi i32 [ %qq, %Q ], [ %rr, %R ] 106 %ss = add i32 %pp.phi, 1 107 br i1 %cond.uni, label %D, label %T 108 109D: 110 br i1 %cond.uni, label %exit, label %A 111 112T: 113 %tt.phi = phi i32 [ %ss, %S ], [ %a, %C ] 114 %tt = add i32 %b, 1 115 br label %P 116 117exit: 118 %ee = add i32 %b, 1 119 ret void 120} 121 122;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 123;; 124;; The inner cycle has a header (T) that does not dominate the join. 125;; The outer cycle has a header (B) that dominates the join. Hence 126;; only the inner cycle is reported as diverged. 127;; 128;; CHECK-LABEL: UniformityInfo for function 'headers_b_t': 129;; CHECK: CYCLES ASSSUMED DIVERGENT: 130;; CHECK: depth=2: entries(T P) S Q R 131;; CHECK: CYCLES WITH DIVERGENT EXIT: 132;; CHECK: depth=1: entries(B A) D T S Q P R C 133 134define amdgpu_kernel void @headers_b_t(i32 %a, i32 %b, i32 %c) { 135entry: 136 %cond.uni = icmp slt i32 %a, 0 137 %tid = call i32 @llvm.amdgcn.workitem.id.x() 138 %cond.div = icmp slt i32 %tid, 0 139 %a.div = add i32 %tid, %a 140 br i1 %cond.uni, label %A, label %B 141 142A: 143 br label %B 144 145B: 146 br i1 %cond.uni, label %C, label %D 147 148C: 149 br i1 %cond.uni, label %P, label %T 150 151P: 152 %pp.phi = phi i32 [ %a, %C], [ %b, %T ] 153 %pp = add i32 %b, 1 154 br i1 %cond.uni, label %R, label %Q 155 156Q: 157 %qq = add i32 %b, 1 158 br i1 %cond.div, label %S, label %R 159 160R: 161 %rr = add i32 %b, 1 162 br label %S 163 164S: 165 %s.phi = phi i32 [ %qq, %Q ], [ %rr, %R ] 166 %ss = add i32 %pp.phi, 1 167 br i1 %cond.uni, label %D, label %T 168 169D: 170 br i1 %cond.uni, label %exit, label %A 171 172T: 173 %tt.phi = phi i32 [ %ss, %S ], [ %a, %C ] 174 %tt = add i32 %b, 1 175 br label %P 176 177exit: 178 %ee = add i32 %b, 1 179 ret void 180} 181 182;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 183;; 184;; The cycles have headers (A and T) that do not dominate the join. 185;; Hence the outermost cycle is reported as diverged. 186;; 187;; CHECK-LABEL: UniformityInfo for function 'headers_a_t': 188;; CHECK: CYCLES ASSSUMED DIVERGENT: 189;; CHECK: depth=1: entries(A B) D T S Q P R C 190;; CHECK-NOT: CYCLES WITH DIVERGENT EXIT: 191 192define amdgpu_kernel void @headers_a_t(i32 %a, i32 %b, i32 %c) { 193entry: 194 %cond.uni = icmp slt i32 %a, 0 195 %tid = call i32 @llvm.amdgcn.workitem.id.x() 196 %cond.div = icmp slt i32 %tid, 0 197 %a.div = add i32 %tid, %a 198 br i1 %cond.uni, label %B, label %A 199 200A: 201 br label %B 202 203B: 204 br i1 %cond.uni, label %C, label %D 205 206C: 207 br i1 %cond.uni, label %P, label %T 208 209P: 210 %pp.phi = phi i32 [ %a, %C], [ %b, %T ] 211 %pp = add i32 %b, 1 212 br i1 %cond.uni, label %R, label %Q 213 214Q: 215 %qq = add i32 %b, 1 216 br i1 %cond.div, label %S, label %R 217 218R: 219 %rr = add i32 %b, 1 220 br label %S 221 222S: 223 %s.phi = phi i32 [ %qq, %Q ], [ %rr, %R ] 224 %ss = add i32 %pp.phi, 1 225 br i1 %cond.uni, label %D, label %T 226 227D: 228 br i1 %cond.uni, label %exit, label %A 229 230T: 231 %tt.phi = phi i32 [ %ss, %S ], [ %a, %C ] 232 %tt = add i32 %b, 1 233 br label %P 234 235exit: 236 %ee = add i32 %b, 1 237 ret void 238} 239 240declare i32 @llvm.amdgcn.workitem.id.x() #0 241