1; RUN: llc -O2 -tail-dup-size=100 -enable-tail-merge=0 < %s | FileCheck %s 2; RUN: %if ptxas %{ llc -O2 -tail-dup-size=100 -enable-tail-merge=0 < %s | %ptxas-verify %} 3target triple = "nvptx64-nvidia-cuda" 4 5declare void @foo() 6declare void @llvm.nvvm.barrier0() 7 8; syncthreads shouldn't be duplicated. 9; CHECK: .func call_syncthreads 10; CHECK: bar.sync 11; CHECK-NOT: bar.sync 12define void @call_syncthreads(ptr %a, ptr %b, i1 %cond, i1 %cond2) nounwind { 13 br i1 %cond, label %L1, label %L2 14 br i1 %cond2, label %Ret, label %L1 15Ret: 16 ret void 17L1: 18 store i32 0, ptr %a 19 br label %L42 20L2: 21 store i32 1, ptr %a 22 br label %L42 23L42: 24 call void @llvm.nvvm.barrier0() 25 br label %Ret 26} 27 28; Check that call_syncthreads really does trigger tail duplication. 29; CHECK: .func call_foo 30; CHECK: call 31; CHECK: call 32define void @call_foo(ptr %a, ptr %b, i1 %cond, i1 %cond2) nounwind { 33 br i1 %cond, label %L1, label %L2 34 br i1 %cond2, label %Ret, label %L1 35Ret: 36 ret void 37L1: 38 store i32 0, ptr %a 39 br label %L42 40L2: 41 store i32 1, ptr %a 42 br label %L42 43L42: 44 call void @foo() 45 br label %Ret 46} 47