xref: /llvm-project/llvm/test/CodeGen/AMDGPU/diverge-switch-default.ll (revision 9e9907f1cfa424366fba58d9520f9305b537cec9)
1; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -print-after=si-annotate-control-flow %s -o /dev/null 2>&1 | FileCheck %s
2
3target datalayout = "n32"
4
5; CHECK-LABEL: @switch_unreachable_default
6
7define amdgpu_kernel void @switch_unreachable_default(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #0 {
8centry:
9  %tid = call i32 @llvm.amdgcn.workitem.id.x()
10  switch i32 %tid, label %sw.default [
11    i32 0, label %sw.bb0
12    i32 1, label %sw.bb1
13  ]
14
15sw.bb0:
16  br label %sw.epilog
17
18sw.bb1:
19  br label %sw.epilog
20
21sw.default:
22  unreachable
23
24sw.epilog:
25  %ptr = phi ptr addrspace(1) [%in0, %sw.bb0], [%in1, %sw.bb1]
26  br label %sw.while
27
28; The loop below is necessary to preserve the effect of the
29; unreachable default on divergence analysis in the presence of other
30; optimizations. The loop consists of a single block where the loop
31; exit is divergent because it depends on the divergent phi at the
32; start of the block. The checks below ensure that the loop exit is
33; handled correctly as divergent. But the data-flow within the block
34; is sensitive to optimizations; so we just ensure that the relevant
35; operations in the block body are indeed in the same block.
36
37; CHECK: [[PHI:%[a-zA-Z0-9._]+]]  = phi i64
38; CHECK-NOT: {{ br }}
39; CHECK: load i8
40; CHECK-NOT: {{ br }}
41; CHECK: [[ICMP:%[a-zA-Z0-9._]+]] = icmp eq
42; CHECK: [[IF:%[a-zA-Z0-9._]+]]   = call i64 @llvm.amdgcn.if.break.i64(i1 [[ICMP]], i64 [[PHI]])
43; CHECK: [[LOOP:%[a-zA-Z0-9._]+]] = call i1 @llvm.amdgcn.loop.i64(i64 [[IF]])
44; CHECK: br i1 [[LOOP]]
45
46sw.while:
47  %p = phi ptr addrspace(1) [ %ptr, %sw.epilog ], [ %incdec.ptr, %sw.while ]
48  %count = phi i32 [ 0, %sw.epilog ], [ %count.inc, %sw.while ]
49  %char = load i8, ptr addrspace(1) %p, align 1
50  %tobool = icmp eq i8 %char, 0
51  %incdec.ptr = getelementptr inbounds i8, ptr addrspace(1) %p, i64 1
52  %count.inc = add i32 %count, 1
53  br i1 %tobool, label %sw.exit, label %sw.while
54
55sw.exit:
56  %tid64 = zext i32 %tid to i64
57  %gep_out = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid64
58  store i32 %count, ptr addrspace(1) %gep_out, align 4
59  ret void
60}
61
62declare i32 @llvm.amdgcn.workitem.id.x() #0
63
64attributes #0 = { nounwind readnone }
65attributes #1 = { convergent noinline optnone }
66