1; RUN: opt -mtriple=amdgcn-- -passes='loop(simple-loop-unswitch<nontrivial>),verify<loops>' -S < %s | FileCheck %s 2; RUN: opt -mtriple=amdgcn-- -passes='loop-mssa(simple-loop-unswitch<nontrivial>),verify<loops>' -S < %s | FileCheck %s 3; RUN: opt -mtriple=amdgcn-- -passes='simple-loop-unswitch<nontrivial>' -verify-memoryssa -S < %s | FileCheck %s 4 5declare i32 @a() 6declare i32 @b() 7declare i32 @c() 8 9; Non-trivial loop unswitching where there are two distinct trivial 10; conditions to unswitch within the loop. The conditions are divergent 11; and should not unswitch. 12define void @test1(ptr %ptr, i1 %cond1, i1 %cond2) { 13; CHECK-LABEL: @test1( 14entry: 15 br label %loop_begin 16; CHECK-NEXT: entry: 17; CHECK-NEXT: br label %loop_begin 18 19loop_begin: 20 br i1 %cond1, label %loop_a, label %loop_b 21; CHECK: loop_begin: 22; CHECK-NEXT: br i1 %cond1, label %loop_a, label %loop_b 23 24loop_a: 25 %unused.a = call i32 @a() 26 br label %latch 27; CHECK: loop_a: 28; CHECK-NEXT: %unused.a = call i32 @a() 29; CHECK-NEXT: br label %latch 30 31loop_b: 32 br i1 %cond2, label %loop_b_a, label %loop_b_b 33; CHECK: loop_b: 34; CHECK-NEXT: br i1 %cond2, label %loop_b_a, label %loop_b_b 35 36loop_b_a: 37 %unused.b = call i32 @b() 38 br label %latch 39; CHECK: loop_b_a: 40; CHECK-NEXT: %unused.b = call i32 @b() 41; CHECK-NEXT: br label %latch 42 43loop_b_b: 44 %unused.c = call i32 @c() 45 br label %latch 46; CHECK: loop_b_b: 47; CHECK-NEXT: %unused.c = call i32 @c() 48; CHECK-NEXT: br label %latch 49 50latch: 51 %v = load i1, ptr %ptr 52 br i1 %v, label %loop_begin, label %loop_exit 53; CHECK: latch: 54; CHECK-NEXT: %v = load i1, ptr %ptr 55; CHECK-NEXT: br i1 %v, label %loop_begin, label %loop_exit 56 57loop_exit: 58 ret void 59; CHECK: loop_exit: 60; CHECK-NEXT: ret void 61} 62 63; Non-trivial loop unswitching where there are two distinct trivial 64; conditions to unswitch within the loop. The conditions are known to 65; be uniform, so it should be unswitchable. However, unswitch 66; currently does not make use of UniformityAnalysis. 67define amdgpu_kernel void @test1_uniform(ptr %ptr, i1 %cond1, i1 %cond2) { 68; CHECK-LABEL: @test1_uniform( 69entry: 70 br label %loop_begin 71; CHECK-NEXT: entry: 72; CHECK-NEXT: br label %loop_begin 73 74loop_begin: 75 br i1 %cond1, label %loop_a, label %loop_b 76; CHECK: loop_begin: 77; CHECK-NEXT: br i1 %cond1, label %loop_a, label %loop_b 78 79loop_a: 80 %unused.a = call i32 @a() 81 br label %latch 82; CHECK: loop_a: 83; CHECK-NEXT: %unused.a = call i32 @a() 84; CHECK-NEXT: br label %latch 85 86loop_b: 87 br i1 %cond2, label %loop_b_a, label %loop_b_b 88; CHECK: loop_b: 89; CHECK-NEXT: br i1 %cond2, label %loop_b_a, label %loop_b_b 90 91loop_b_a: 92 %unused.b = call i32 @b() 93 br label %latch 94; CHECK: loop_b_a: 95; CHECK-NEXT: %unused.b = call i32 @b() 96; CHECK-NEXT: br label %latch 97 98loop_b_b: 99 %unused.c = call i32 @c() 100 br label %latch 101; CHECK: loop_b_b: 102; CHECK-NEXT: %unused.c = call i32 @c() 103; CHECK-NEXT: br label %latch 104 105latch: 106 %v = load i1, ptr %ptr 107 br i1 %v, label %loop_begin, label %loop_exit 108; CHECK: latch: 109; CHECK-NEXT: %v = load i1, ptr %ptr 110; CHECK-NEXT: br i1 %v, label %loop_begin, label %loop_exit 111 112loop_exit: 113 ret void 114; CHECK: loop_exit: 115; CHECK-NEXT: ret void 116} 117 118; Non-trivial loop unswitching where there are two distinct trivial 119; conditions to unswitch within the loop. There is no divergence 120; because it's assumed it can only execute with a workgroup of size 1. 121define void @test1_single_lane_execution(ptr %ptr, i1 %cond1, i1 %cond2) #0 { 122; CHECK-LABEL: @test1_single_lane_execution( 123entry: 124 br label %loop_begin 125; CHECK-NEXT: entry: 126; CHECK-NEXT: br i1 %cond1, label %entry.split.us, label %entry.split 127 128loop_begin: 129 br i1 %cond1, label %loop_a, label %loop_b 130 131loop_a: 132 call i32 @a() 133 br label %latch 134; The 'loop_a' unswitched loop. 135; 136; CHECK: entry.split.us: 137; CHECK-NEXT: br label %loop_begin.us 138; 139; CHECK: loop_begin.us: 140; CHECK-NEXT: br label %loop_a.us 141; 142; CHECK: loop_a.us: 143; CHECK-NEXT: call i32 @a() 144; CHECK-NEXT: br label %latch.us 145; 146; CHECK: latch.us: 147; CHECK-NEXT: %[[V:.*]] = load i1, ptr %ptr 148; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us, label %loop_exit.split.us 149; 150; CHECK: loop_exit.split.us: 151; CHECK-NEXT: br label %loop_exit 152 153loop_b: 154 br i1 %cond2, label %loop_b_a, label %loop_b_b 155; The second unswitched condition. 156; 157; CHECK: entry.split: 158; CHECK-NEXT: br i1 %cond2, label %entry.split.split.us, label %entry.split.split 159 160loop_b_a: 161 call i32 @b() 162 br label %latch 163; The 'loop_b_a' unswitched loop. 164; 165; CHECK: entry.split.split.us: 166; CHECK-NEXT: br label %loop_begin.us1 167; 168; CHECK: loop_begin.us1: 169; CHECK-NEXT: br label %loop_b.us 170; 171; CHECK: loop_b.us: 172; CHECK-NEXT: br label %loop_b_a.us 173; 174; CHECK: loop_b_a.us: 175; CHECK-NEXT: call i32 @b() 176; CHECK-NEXT: br label %latch.us2 177; 178; CHECK: latch.us2: 179; CHECK-NEXT: %[[V:.*]] = load i1, ptr %ptr 180; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us1, label %loop_exit.split.split.us 181; 182; CHECK: loop_exit.split.split.us: 183; CHECK-NEXT: br label %loop_exit.split 184 185loop_b_b: 186 call i32 @c() 187 br label %latch 188; The 'loop_b_b' unswitched loop. 189; 190; CHECK: entry.split.split: 191; CHECK-NEXT: br label %loop_begin 192; 193; CHECK: loop_begin: 194; CHECK-NEXT: br label %loop_b 195; 196; CHECK: loop_b: 197; CHECK-NEXT: br label %loop_b_b 198; 199; CHECK: loop_b_b: 200; CHECK-NEXT: call i32 @c() 201; CHECK-NEXT: br label %latch 202; 203; CHECK: latch: 204; CHECK-NEXT: %[[V:.*]] = load i1, ptr %ptr 205; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit.split.split 206; 207; CHECK: loop_exit.split.split: 208; CHECK-NEXT: br label %loop_exit.split 209 210latch: 211 %v = load i1, ptr %ptr 212 br i1 %v, label %loop_begin, label %loop_exit 213 214loop_exit: 215 ret void 216; CHECK: loop_exit.split: 217; CHECK-NEXT: br label %loop_exit 218; 219; CHECK: loop_exit: 220; CHECK-NEXT: ret 221} 222 223attributes #0 = { "amdgpu-flat-work-group-size"="1,1" } 224