xref: /llvm-project/llvm/test/CodeGen/NVPTX/MachineSink-convergent.ll (revision 9b81548a6847937f194bf62033f295b8385d9b42)
1; RUN: llc < %s | FileCheck %s
2; RUN: %if ptxas %{ llc < %s | %ptxas-verify %}
3
4target triple = "nvptx64-nvidia-cuda"
5
6declare void @llvm.nvvm.barrier0()
7
8; Load a value, then syncthreads.  Branch, and use the loaded value only on one
9; side of the branch.  The load shouldn't be sunk beneath the call, because
10; syncthreads is modeled as maystore.
11define i32 @f(i32 %x, ptr %ptr, i1 %cond) {
12Start:
13  ; CHECK: ld.u32
14  %ptr_val = load i32, ptr %ptr
15  ; CHECK: bar.sync
16  call void @llvm.nvvm.barrier0()
17  br i1 %cond, label %L1, label %L2
18L1:
19  %ptr_val2 = add i32 %ptr_val, 100
20  br label %L2
21L2:
22  %v4 = phi i32 [ %x, %Start ], [ %ptr_val2, %L1 ]
23  %v5 = add i32 %v4, 1000
24  ret i32 %v5
25}
26