1; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -print-after=si-annotate-control-flow %s -o /dev/null 2>&1 | FileCheck %s 2 3target datalayout = "n32" 4 5; CHECK-LABEL: @switch_unreachable_default 6 7define amdgpu_kernel void @switch_unreachable_default(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #0 { 8centry: 9 %tid = call i32 @llvm.amdgcn.workitem.id.x() 10 switch i32 %tid, label %sw.default [ 11 i32 0, label %sw.bb0 12 i32 1, label %sw.bb1 13 ] 14 15sw.bb0: 16 br label %sw.epilog 17 18sw.bb1: 19 br label %sw.epilog 20 21sw.default: 22 unreachable 23 24sw.epilog: 25 %ptr = phi ptr addrspace(1) [%in0, %sw.bb0], [%in1, %sw.bb1] 26 br label %sw.while 27 28; The loop below is necessary to preserve the effect of the 29; unreachable default on divergence analysis in the presence of other 30; optimizations. The loop consists of a single block where the loop 31; exit is divergent because it depends on the divergent phi at the 32; start of the block. The checks below ensure that the loop exit is 33; handled correctly as divergent. But the data-flow within the block 34; is sensitive to optimizations; so we just ensure that the relevant 35; operations in the block body are indeed in the same block. 36 37; CHECK: [[PHI:%[a-zA-Z0-9._]+]] = phi i64 38; CHECK-NOT: {{ br }} 39; CHECK: load i8 40; CHECK-NOT: {{ br }} 41; CHECK: [[ICMP:%[a-zA-Z0-9._]+]] = icmp eq 42; CHECK: [[IF:%[a-zA-Z0-9._]+]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[ICMP]], i64 [[PHI]]) 43; CHECK: [[LOOP:%[a-zA-Z0-9._]+]] = call i1 @llvm.amdgcn.loop.i64(i64 [[IF]]) 44; CHECK: br i1 [[LOOP]] 45 46sw.while: 47 %p = phi ptr addrspace(1) [ %ptr, %sw.epilog ], [ %incdec.ptr, %sw.while ] 48 %count = phi i32 [ 0, %sw.epilog ], [ %count.inc, %sw.while ] 49 %char = load i8, ptr addrspace(1) %p, align 1 50 %tobool = icmp eq i8 %char, 0 51 %incdec.ptr = getelementptr inbounds i8, ptr addrspace(1) %p, i64 1 52 %count.inc = add i32 %count, 1 53 br i1 %tobool, label %sw.exit, label %sw.while 54 55sw.exit: 56 %tid64 = zext i32 %tid to i64 57 %gep_out = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid64 58 store i32 %count, ptr addrspace(1) %gep_out, align 4 59 ret void 60} 61 62declare i32 @llvm.amdgcn.workitem.id.x() #0 63 64attributes #0 = { nounwind readnone } 65attributes #1 = { convergent noinline optnone } 66