1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80 | FileCheck %s 3; RUN: %if ptxas-12.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80 | %ptxas-verify -arch=sm_90 %} 4 5target triple = "nvptx64-nvidia-cuda" 6 7declare {i32, i1} @llvm.nvvm.elect.sync(i32) 8 9define {i32, i1} @elect_sync(i32 %mask) { 10; CHECK-LABEL: elect_sync( 11; CHECK: { 12; CHECK-NEXT: .reg .pred %p<2>; 13; CHECK-NEXT: .reg .b16 %rs<2>; 14; CHECK-NEXT: .reg .b32 %r<3>; 15; CHECK-EMPTY: 16; CHECK-NEXT: // %bb.0: 17; CHECK-NEXT: ld.param.u32 %r1, [elect_sync_param_0]; 18; CHECK-NEXT: elect.sync %r2|%p1, %r1; 19; CHECK-NEXT: st.param.b32 [func_retval0], %r2; 20; CHECK-NEXT: selp.u16 %rs1, -1, 0, %p1; 21; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1; 22; CHECK-NEXT: ret; 23 %val = call {i32, i1} @llvm.nvvm.elect.sync(i32 %mask) 24 ret {i32, i1} %val 25} 26 27define {i32, i1} @elect_sync_imm() { 28; CHECK-LABEL: elect_sync_imm( 29; CHECK: { 30; CHECK-NEXT: .reg .pred %p<2>; 31; CHECK-NEXT: .reg .b16 %rs<2>; 32; CHECK-NEXT: .reg .b32 %r<2>; 33; CHECK-EMPTY: 34; CHECK-NEXT: // %bb.0: 35; CHECK-NEXT: elect.sync %r1|%p1, -1; 36; CHECK-NEXT: st.param.b32 [func_retval0], %r1; 37; CHECK-NEXT: selp.u16 %rs1, -1, 0, %p1; 38; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1; 39; CHECK-NEXT: ret; 40 %val = call {i32, i1} @llvm.nvvm.elect.sync(i32 u0xffffffff) 41 ret {i32, i1} %val 42} 43 44; When there are two elect.sync's make sure that 45; the second one is not optimized away. 46define {i32, i1} @elect_sync_twice(i32 %mask) { 47; CHECK-LABEL: elect_sync_twice( 48; CHECK: { 49; CHECK-NEXT: .reg .pred %p<3>; 50; CHECK-NEXT: .reg .b16 %rs<2>; 51; CHECK-NEXT: .reg .b32 %r<4>; 52; CHECK-EMPTY: 53; CHECK-NEXT: // %bb.0: 54; CHECK-NEXT: ld.param.u32 %r1, [elect_sync_twice_param_0]; 55; CHECK-NEXT: elect.sync %r2|%p1, %r1; 56; CHECK-NEXT: elect.sync %r3|%p2, %r1; 57; CHECK-NEXT: st.param.b32 [func_retval0], %r2; 58; CHECK-NEXT: selp.u16 %rs1, -1, 0, %p1; 59; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1; 60; CHECK-NEXT: ret; 61 %val = call {i32, i1} @llvm.nvvm.elect.sync(i32 %mask) 62 %val2 = call {i32, i1} @llvm.nvvm.elect.sync(i32 %mask) 63 ret {i32, i1} %val 64} 65