xref: /llvm-project/llvm/test/CodeGen/NVPTX/elect.ll (revision b279f6b098d3849f7f1c1f539b108307d5f8ae2d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80 | FileCheck %s
3; RUN: %if ptxas-12.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80 | %ptxas-verify -arch=sm_90 %}
4
5target triple = "nvptx64-nvidia-cuda"
6
7declare {i32, i1} @llvm.nvvm.elect.sync(i32)
8
9define {i32, i1} @elect_sync(i32 %mask) {
10; CHECK-LABEL: elect_sync(
11; CHECK:       {
12; CHECK-NEXT:    .reg .pred %p<2>;
13; CHECK-NEXT:    .reg .b16 %rs<2>;
14; CHECK-NEXT:    .reg .b32 %r<3>;
15; CHECK-EMPTY:
16; CHECK-NEXT:  // %bb.0:
17; CHECK-NEXT:    ld.param.u32 %r1, [elect_sync_param_0];
18; CHECK-NEXT:    elect.sync %r2|%p1, %r1;
19; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
20; CHECK-NEXT:    selp.u16 %rs1, -1, 0, %p1;
21; CHECK-NEXT:    st.param.b8 [func_retval0+4], %rs1;
22; CHECK-NEXT:    ret;
23  %val = call {i32, i1} @llvm.nvvm.elect.sync(i32 %mask)
24  ret {i32, i1} %val
25}
26
27define {i32, i1} @elect_sync_imm() {
28; CHECK-LABEL: elect_sync_imm(
29; CHECK:       {
30; CHECK-NEXT:    .reg .pred %p<2>;
31; CHECK-NEXT:    .reg .b16 %rs<2>;
32; CHECK-NEXT:    .reg .b32 %r<2>;
33; CHECK-EMPTY:
34; CHECK-NEXT:  // %bb.0:
35; CHECK-NEXT:    elect.sync %r1|%p1, -1;
36; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
37; CHECK-NEXT:    selp.u16 %rs1, -1, 0, %p1;
38; CHECK-NEXT:    st.param.b8 [func_retval0+4], %rs1;
39; CHECK-NEXT:    ret;
40  %val = call {i32, i1} @llvm.nvvm.elect.sync(i32 u0xffffffff)
41  ret {i32, i1} %val
42}
43
44; When there are two elect.sync's make sure that
45; the second one is not optimized away.
46define {i32, i1} @elect_sync_twice(i32 %mask) {
47; CHECK-LABEL: elect_sync_twice(
48; CHECK:       {
49; CHECK-NEXT:    .reg .pred %p<3>;
50; CHECK-NEXT:    .reg .b16 %rs<2>;
51; CHECK-NEXT:    .reg .b32 %r<4>;
52; CHECK-EMPTY:
53; CHECK-NEXT:  // %bb.0:
54; CHECK-NEXT:    ld.param.u32 %r1, [elect_sync_twice_param_0];
55; CHECK-NEXT:    elect.sync %r2|%p1, %r1;
56; CHECK-NEXT:    elect.sync %r3|%p2, %r1;
57; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
58; CHECK-NEXT:    selp.u16 %rs1, -1, 0, %p1;
59; CHECK-NEXT:    st.param.b8 [func_retval0+4], %rs1;
60; CHECK-NEXT:    ret;
61  %val = call {i32, i1} @llvm.nvvm.elect.sync(i32 %mask)
62  %val2 = call {i32, i1} @llvm.nvvm.elect.sync(i32 %mask)
63  ret {i32, i1} %val
64}
65