xref: /llvm-project/llvm/test/CodeGen/NVPTX/TailDuplication-convergent.ll (revision 9b81548a6847937f194bf62033f295b8385d9b42)
1; RUN: llc -O2 -tail-dup-size=100 -enable-tail-merge=0 < %s | FileCheck %s
2; RUN: %if ptxas %{ llc -O2 -tail-dup-size=100 -enable-tail-merge=0 < %s | %ptxas-verify %}
3target triple = "nvptx64-nvidia-cuda"
4
5declare void @foo()
6declare void @llvm.nvvm.barrier0()
7
8; syncthreads shouldn't be duplicated.
9; CHECK: .func call_syncthreads
10; CHECK: bar.sync
11; CHECK-NOT: bar.sync
12define void @call_syncthreads(ptr %a, ptr %b, i1 %cond, i1 %cond2) nounwind {
13  br i1 %cond, label %L1, label %L2
14  br i1 %cond2, label %Ret, label %L1
15Ret:
16  ret void
17L1:
18  store i32 0, ptr %a
19  br label %L42
20L2:
21  store i32 1, ptr %a
22  br label %L42
23L42:
24  call void @llvm.nvvm.barrier0()
25  br label %Ret
26}
27
28; Check that call_syncthreads really does trigger tail duplication.
29; CHECK: .func call_foo
30; CHECK: call
31; CHECK: call
32define void @call_foo(ptr %a, ptr %b, i1 %cond, i1 %cond2) nounwind {
33  br i1 %cond, label %L1, label %L2
34  br i1 %cond2, label %Ret, label %L1
35Ret:
36  ret void
37L1:
38  store i32 0, ptr %a
39  br label %L42
40L2:
41  store i32 1, ptr %a
42  br label %L42
43L42:
44  call void @foo()
45  br label %Ret
46}
47