1; RUN: opt %s -mtriple amdgcn-- -passes='print<uniformity>' -disable-output 2>&1 | FileCheck %s 2 3; 4; Entry 5; | 6; v 7; -------->H--------- 8; | | | 9; | v | 10; | --->T---- | 11; | | | | 12; | | V | 13; S<---R P <--- 14; ^ ^ | 15; | | Div | 16; | --- Q <-- 17; | | 18; | v 19; -------- U 20; | 21; v 22; Exit 23; 24; The divergent branch is at Q that exits an irreducible cycle with 25; entries T and P nested inside a reducible cycle with header H. R is 26; assigned label R, which reaches P. S is a join node with label S. If 27; this is propagated to P via H, then P is incorrectly recognized as a 28; join, making the inner cycle divergent. P is always executed 29; convergently -- either by threads that reconverged at header H, or 30; by threads that are still executing the inner cycle. Thus, any PHI 31; at P should not be marked divergent. 32 33define amdgpu_kernel void @nested_irreducible(i32 %a, i32 %b, i32 %c) { 34; CHECK=LABEL: UniformityInfo for function 'nested_irreducible': 35; CHECK-NOT: CYCLES ASSSUMED DIVERGENT: 36; CHECK: CYCLES WITH DIVERGENT EXIT: 37; CHECK-DAG: depth=2: entries(P T) R Q 38; CHECK-DAG: depth=1: entries(H) S P T R Q U 39entry: 40 %cond.uni = icmp slt i32 %a, 0 41 %tid = call i32 @llvm.amdgcn.workitem.id.x() 42 %cond.div = icmp slt i32 %tid, 0 43 br label %H 44 45H: 46 br i1 %cond.uni, label %T, label %P 47 48P: 49; CHECK-LABEL: BLOCK P 50; CHECK-NOT: DIVERGENT: %pp.phi = 51; CHECK-NOT: DIVERGENT: %pp = 52 %pp.phi = phi i32 [ %a, %H], [ %b, %T ] 53 %pp = add i32 %b, 1 54 br label %Q 55 56Q: 57; CHECK-LABEL: BLOCK Q 58; CHECK-NOT: DIVERGENT: %qq = 59; CHECK-NOT: DIVERGENT: %qq.uni = 60 %qq = add i32 %b, 1 61 %qq.uni = add i32 %pp.phi, 1 62 br i1 %cond.div, label %R, label %U 63 64R: 65 br i1 %cond.uni, label %S, label %T 66 67T: 68; CHECK-LABEL: BLOCK T 69; CHECK-NOT: DIVERGENT: %tt.phi = 70; CHECK-NOT: DIVERGENT: %tt = 71 %tt.phi = phi i32 [ %qq, %R ], [ %a, %H ] 72 %tt = add i32 %b, 1 73 br label %P 74 75S: 76; CHECK-LABEL: BLOCK S 77; CHECK: DIVERGENT: %ss.phi = 78; CHECK-NOT: DIVERGENT: %ss = 79 %ss.phi = phi i32 [ %qq.uni, %U ], [ %a, %R ] 80 %ss = add i32 %b, 1 81 br label %H 82 83U: 84 br i1 %cond.uni, label %S, label %exit 85 86exit: 87; CHECK: DIVERGENT: %ee.div = 88; CHECK-NOT: DIVERGENT: %ee = 89 %ee.div = add i32 %qq.uni, 1 90 %ee = add i32 %b, 1 91 ret void 92} 93 94; 95; Entry 96; | 97; v 98; -->-------->H--------- 99; | ^ | | 100; | | | | 101; | | | | 102; | | | | 103; | | v V 104; | R<-------T-->U--->P 105; | Div | 106; | | 107; ----------- Q <------- 108; | 109; v 110; Exit 111; 112; This is a reducible cycle with a divergent branch at T. Disjoint 113; paths eventually join at the header H, which is assigned label H. 114; Node P is assigned label U. If the header label were propagated to 115; P, it will be incorrectly recgonized as a join. P is always executed 116; convergently -- either by threads that reconverged at header H, or 117; by threads that diverged at T (and eventually reconverged at H). 118; Thus, any PHI at P should not be marked divergent. 119 120define amdgpu_kernel void @header_label_1(i32 %a, i32 %b, i32 %c) { 121; CHECK=LABEL: UniformityInfo for function 'header_label_1': 122; CHECK-NOT: CYCLES ASSSUMED DIVERGENT: 123; CHECK: CYCLES WITH DIVERGENT EXIT: 124; CHECK: depth=1: entries(H) Q P U T R 125entry: 126 %cond.uni = icmp slt i32 %a, 0 127 %tid = call i32 @llvm.amdgcn.workitem.id.x() 128 %cond.div = icmp slt i32 %tid, 0 129 br label %H 130 131H: 132 br i1 %cond.uni, label %T, label %P 133 134P: 135; CHECK-LABEL: BLOCK P 136; CHECK-NOT: DIVERGENT: %pp.phi = 137; CHECK-NOT: DIVERGENT: %pp = 138 %pp.phi = phi i32 [ %a, %H], [ %b, %U ] 139 %pp = add i32 %b, 1 140 br label %Q 141 142Q: 143; CHECK-LABEL: BLOCK Q 144; CHECK-NOT: DIVERGENT: %qq = 145; CHECK-NOT: DIVERGENT: %qq.uni = 146 %qq = add i32 %b, 1 147 %qq.uni = add i32 %pp.phi, 1 148 br i1 %cond.uni, label %exit, label %H 149 150R: 151 br label %H 152 153T: 154 br i1 %cond.div, label %R, label %U 155 156U: 157 br label %P 158 159exit: 160; CHECK-LABEL: BLOCK exit 161; CHECK: DIVERGENT: %ee.div = 162; CHECK-NOT: DIVERGENT: %ee = 163 %ee.div = add i32 %qq.uni, 1 164 %ee = add i32 %b, 1 165 ret void 166} 167 168; entry 169; | 170; --> H1 171; | | \ 172; | | H2(div) 173; | \ / \ 174; | B C 175; ^ \ / 176; \------D 177; | 178; X 179; 180; This is a reducible cycle with a divergent branch at H2. Disjoint 181; paths eventually join at the header D, which is assigned label D. 182; Node B is assigned label B. If the header label D were propagated to 183; B, it will be incorrectly recgonized as a join. B is always executed 184; convergently -- either by threads that reconverged at header H1, or 185; by threads that diverge at H2 (and eventually reconverged at H1). 186; Thus, any PHI at B should not be marked divergent. 187 188define amdgpu_kernel void @header_label_2(i32 %a, i32 %b, i32 %c) { 189; CHECK-LABEL: UniformityInfo for function 'header_label_2': 190; CHECK-NOT: CYCLES ASSSUMED DIVERGENT: 191; CHECK-NOT: CYCLES WITH DIVERGENT EXIT: 192entry: 193 %cond.uni = icmp slt i32 %a, 0 194 %tid = call i32 @llvm.amdgcn.workitem.id.x() 195 %cond.div = icmp slt i32 %tid, 0 196 br label %H1 197 198H1: 199 br i1 %cond.uni, label %B, label %H2 200 201H2: 202 br i1 %cond.div, label %B, label %C 203 204B: 205; CHECK-LABEL: BLOCK B 206; CHECK-NOT: DIVERGENT: %bb.phi = 207 %bb.phi = phi i32 [%a, %H1], [%b, %H2] 208 br label %D 209 210C: 211 br label %D 212 213D: 214; CHECK-LABEL: BLOCK D 215; CHECK: DIVERGENT: %dd.phi = 216 %dd.phi = phi i32 [%a, %B], [%b, %C] 217 br i1 %cond.uni, label %exit, label %H1 218 219exit: 220 %ee.1 = add i32 %dd.phi, 1 221 %ee.2 = add i32 %b, 1 222 ret void 223} 224 225declare i32 @llvm.amdgcn.workitem.id.x() #0 226