1099bf20cSAlex MacLean; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2*b279f6b0SFangrui Song; RUN: llc < %s -mtriple=nvptx -mcpu=sm_61 | FileCheck %s 3*b279f6b0SFangrui Song; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_61 | FileCheck %s 4099bf20cSAlex MacLean 5099bf20cSAlex MacLeantarget triple = "nvptx-nvidia-cuda" 6099bf20cSAlex MacLean 7099bf20cSAlex MacLeandeclare i32 @llvm.nvvm.idp4a.s.s(i32, i32, i32) 8099bf20cSAlex MacLeandeclare i32 @llvm.nvvm.idp4a.s.u(i32, i32, i32) 9099bf20cSAlex MacLeandeclare i32 @llvm.nvvm.idp4a.u.s(i32, i32, i32) 10099bf20cSAlex MacLeandeclare i32 @llvm.nvvm.idp4a.u.u(i32, i32, i32) 11099bf20cSAlex MacLean 12099bf20cSAlex MacLeandefine i32 @test_dp4a_u32_u32(i32 %a, i32 %b, i32 %c) { 13099bf20cSAlex MacLean; CHECK-LABEL: test_dp4a_u32_u32( 14099bf20cSAlex MacLean; CHECK: { 15099bf20cSAlex MacLean; CHECK-NEXT: .reg .b32 %r<5>; 16099bf20cSAlex MacLean; CHECK-EMPTY: 17099bf20cSAlex MacLean; CHECK-NEXT: // %bb.0: 18099bf20cSAlex MacLean; CHECK-NEXT: ld.param.u32 %r1, [test_dp4a_u32_u32_param_0]; 19099bf20cSAlex MacLean; CHECK-NEXT: ld.param.u32 %r2, [test_dp4a_u32_u32_param_1]; 20099bf20cSAlex MacLean; CHECK-NEXT: ld.param.u32 %r3, [test_dp4a_u32_u32_param_2]; 21099bf20cSAlex MacLean; CHECK-NEXT: dp4a.u32.u32 %r4, %r1, %r2, %r3; 220f0a96b8SYoungsuk Kim; CHECK-NEXT: st.param.b32 [func_retval0], %r4; 23099bf20cSAlex MacLean; CHECK-NEXT: ret; 24099bf20cSAlex MacLean %call = call i32 @llvm.nvvm.idp4a.u.u(i32 %a, i32 %b, i32 %c) 25099bf20cSAlex MacLean ret i32 %call 26099bf20cSAlex MacLean} 27099bf20cSAlex MacLean 28099bf20cSAlex MacLeandefine i32 @test_dp4a_u32imm_u32imm(i32 %c) { 29099bf20cSAlex MacLean; CHECK-LABEL: test_dp4a_u32imm_u32imm( 30099bf20cSAlex MacLean; CHECK: { 31099bf20cSAlex MacLean; CHECK-NEXT: .reg .b32 %r<4>; 32099bf20cSAlex MacLean; CHECK-EMPTY: 33099bf20cSAlex MacLean; CHECK-NEXT: // %bb.0: 34099bf20cSAlex MacLean; CHECK-NEXT: ld.param.u32 %r1, [test_dp4a_u32imm_u32imm_param_0]; 35099bf20cSAlex MacLean; CHECK-NEXT: mov.b32 %r2, 0; 36099bf20cSAlex MacLean; CHECK-NEXT: dp4a.u32.u32 %r3, %r2, %r2, %r1; 370f0a96b8SYoungsuk Kim; CHECK-NEXT: st.param.b32 [func_retval0], %r3; 38099bf20cSAlex MacLean; CHECK-NEXT: ret; 39099bf20cSAlex MacLean %call = call i32 @llvm.nvvm.idp4a.u.u(i32 0, i32 0, i32 %c) 40099bf20cSAlex MacLean ret i32 %call 41099bf20cSAlex MacLean} 42099bf20cSAlex MacLean 43099bf20cSAlex MacLeandefine i32 @test_dp4a_u32_s32(i32 %a, i32 %b, i32 %c) { 44099bf20cSAlex MacLean; CHECK-LABEL: test_dp4a_u32_s32( 45099bf20cSAlex MacLean; CHECK: { 46099bf20cSAlex MacLean; CHECK-NEXT: .reg .b32 %r<5>; 47099bf20cSAlex MacLean; CHECK-EMPTY: 48099bf20cSAlex MacLean; CHECK-NEXT: // %bb.0: 49099bf20cSAlex MacLean; CHECK-NEXT: ld.param.u32 %r1, [test_dp4a_u32_s32_param_0]; 50099bf20cSAlex MacLean; CHECK-NEXT: ld.param.u32 %r2, [test_dp4a_u32_s32_param_1]; 51099bf20cSAlex MacLean; CHECK-NEXT: ld.param.u32 %r3, [test_dp4a_u32_s32_param_2]; 52099bf20cSAlex MacLean; CHECK-NEXT: dp4a.u32.s32 %r4, %r1, %r2, %r3; 530f0a96b8SYoungsuk Kim; CHECK-NEXT: st.param.b32 [func_retval0], %r4; 54099bf20cSAlex MacLean; CHECK-NEXT: ret; 55099bf20cSAlex MacLean %call = call i32 @llvm.nvvm.idp4a.u.s(i32 %a, i32 %b, i32 %c) 56099bf20cSAlex MacLean ret i32 %call 57099bf20cSAlex MacLean} 58099bf20cSAlex MacLean 59099bf20cSAlex MacLeandefine i32 @test_dp4a_s32_u32(i32 %a, i32 %b, i32 %c) { 60099bf20cSAlex MacLean; CHECK-LABEL: test_dp4a_s32_u32( 61099bf20cSAlex MacLean; CHECK: { 62099bf20cSAlex MacLean; CHECK-NEXT: .reg .b32 %r<5>; 63099bf20cSAlex MacLean; CHECK-EMPTY: 64099bf20cSAlex MacLean; CHECK-NEXT: // %bb.0: 65099bf20cSAlex MacLean; CHECK-NEXT: ld.param.u32 %r1, [test_dp4a_s32_u32_param_0]; 66099bf20cSAlex MacLean; CHECK-NEXT: ld.param.u32 %r2, [test_dp4a_s32_u32_param_1]; 67099bf20cSAlex MacLean; CHECK-NEXT: ld.param.u32 %r3, [test_dp4a_s32_u32_param_2]; 68099bf20cSAlex MacLean; CHECK-NEXT: dp4a.s32.u32 %r4, %r1, %r2, %r3; 690f0a96b8SYoungsuk Kim; CHECK-NEXT: st.param.b32 [func_retval0], %r4; 70099bf20cSAlex MacLean; CHECK-NEXT: ret; 71099bf20cSAlex MacLean %call = call i32 @llvm.nvvm.idp4a.s.u(i32 %a, i32 %b, i32 %c) 72099bf20cSAlex MacLean ret i32 %call 73099bf20cSAlex MacLean} 74099bf20cSAlex MacLean 75099bf20cSAlex MacLeandefine i32 @test_dp4a_s32_s32(i32 %a, i32 %b, i32 %c) { 76099bf20cSAlex MacLean; CHECK-LABEL: test_dp4a_s32_s32( 77099bf20cSAlex MacLean; CHECK: { 78099bf20cSAlex MacLean; CHECK-NEXT: .reg .b32 %r<5>; 79099bf20cSAlex MacLean; CHECK-EMPTY: 80099bf20cSAlex MacLean; CHECK-NEXT: // %bb.0: 81099bf20cSAlex MacLean; CHECK-NEXT: ld.param.u32 %r1, [test_dp4a_s32_s32_param_0]; 82099bf20cSAlex MacLean; CHECK-NEXT: ld.param.u32 %r2, [test_dp4a_s32_s32_param_1]; 83099bf20cSAlex MacLean; CHECK-NEXT: ld.param.u32 %r3, [test_dp4a_s32_s32_param_2]; 84099bf20cSAlex MacLean; CHECK-NEXT: dp4a.s32.s32 %r4, %r1, %r2, %r3; 850f0a96b8SYoungsuk Kim; CHECK-NEXT: st.param.b32 [func_retval0], %r4; 86099bf20cSAlex MacLean; CHECK-NEXT: ret; 87099bf20cSAlex MacLean %call = call i32 @llvm.nvvm.idp4a.s.s(i32 %a, i32 %b, i32 %c) 88099bf20cSAlex MacLean ret i32 %call 89099bf20cSAlex MacLean} 90099bf20cSAlex MacLean 91099bf20cSAlex MacLeandeclare i32 @llvm.nvvm.idp2a.s.s(i32, i32, i1 immarg, i32) 92099bf20cSAlex MacLeandeclare i32 @llvm.nvvm.idp2a.s.u(i32, i32, i1 immarg, i32) 93099bf20cSAlex MacLeandeclare i32 @llvm.nvvm.idp2a.u.s(i32, i32, i1 immarg, i32) 94099bf20cSAlex MacLeandeclare i32 @llvm.nvvm.idp2a.u.u(i32, i32, i1 immarg, i32) 95099bf20cSAlex MacLean 96099bf20cSAlex MacLeandefine i32 @test_dp2a_lo_u32_u32(i32 %a, i32 %b, i32 %c) { 97099bf20cSAlex MacLean; CHECK-LABEL: test_dp2a_lo_u32_u32( 98099bf20cSAlex MacLean; CHECK: { 99099bf20cSAlex MacLean; CHECK-NEXT: .reg .b32 %r<5>; 100099bf20cSAlex MacLean; CHECK-EMPTY: 101099bf20cSAlex MacLean; CHECK-NEXT: // %bb.0: 102099bf20cSAlex MacLean; CHECK-NEXT: ld.param.u32 %r1, [test_dp2a_lo_u32_u32_param_0]; 103099bf20cSAlex MacLean; CHECK-NEXT: ld.param.u32 %r2, [test_dp2a_lo_u32_u32_param_1]; 104099bf20cSAlex MacLean; CHECK-NEXT: ld.param.u32 %r3, [test_dp2a_lo_u32_u32_param_2]; 105099bf20cSAlex MacLean; CHECK-NEXT: dp2a.lo.u32.u32 %r4, %r1, %r2, %r3; 1060f0a96b8SYoungsuk Kim; CHECK-NEXT: st.param.b32 [func_retval0], %r4; 107099bf20cSAlex MacLean; CHECK-NEXT: ret; 108099bf20cSAlex MacLean %call = call i32 @llvm.nvvm.idp2a.u.u(i32 %a, i32 %b, i1 0, i32 %c) 109099bf20cSAlex MacLean ret i32 %call 110099bf20cSAlex MacLean} 111099bf20cSAlex MacLean 112099bf20cSAlex MacLeandefine i32 @test_dp2a_lo_u32_s32(i32 %a, i32 %b, i32 %c) { 113099bf20cSAlex MacLean; CHECK-LABEL: test_dp2a_lo_u32_s32( 114099bf20cSAlex MacLean; CHECK: { 115099bf20cSAlex MacLean; CHECK-NEXT: .reg .b32 %r<5>; 116099bf20cSAlex MacLean; CHECK-EMPTY: 117099bf20cSAlex MacLean; CHECK-NEXT: // %bb.0: 118099bf20cSAlex MacLean; CHECK-NEXT: ld.param.u32 %r1, [test_dp2a_lo_u32_s32_param_0]; 119099bf20cSAlex MacLean; CHECK-NEXT: ld.param.u32 %r2, [test_dp2a_lo_u32_s32_param_1]; 120099bf20cSAlex MacLean; CHECK-NEXT: ld.param.u32 %r3, [test_dp2a_lo_u32_s32_param_2]; 121099bf20cSAlex MacLean; CHECK-NEXT: dp2a.lo.u32.s32 %r4, %r1, %r2, %r3; 1220f0a96b8SYoungsuk Kim; CHECK-NEXT: st.param.b32 [func_retval0], %r4; 123099bf20cSAlex MacLean; CHECK-NEXT: ret; 124099bf20cSAlex MacLean %call = call i32 @llvm.nvvm.idp2a.u.s(i32 %a, i32 %b, i1 0, i32 %c) 125099bf20cSAlex MacLean ret i32 %call 126099bf20cSAlex MacLean} 127099bf20cSAlex MacLean 128099bf20cSAlex MacLeandefine i32 @test_dp2a_lo_s32_u32(i32 %a, i32 %b, i32 %c) { 129099bf20cSAlex MacLean; CHECK-LABEL: test_dp2a_lo_s32_u32( 130099bf20cSAlex MacLean; CHECK: { 131099bf20cSAlex MacLean; CHECK-NEXT: .reg .b32 %r<5>; 132099bf20cSAlex MacLean; CHECK-EMPTY: 133099bf20cSAlex MacLean; CHECK-NEXT: // %bb.0: 134099bf20cSAlex MacLean; CHECK-NEXT: ld.param.u32 %r1, [test_dp2a_lo_s32_u32_param_0]; 135099bf20cSAlex MacLean; CHECK-NEXT: ld.param.u32 %r2, [test_dp2a_lo_s32_u32_param_1]; 136099bf20cSAlex MacLean; CHECK-NEXT: ld.param.u32 %r3, [test_dp2a_lo_s32_u32_param_2]; 137099bf20cSAlex MacLean; CHECK-NEXT: dp2a.lo.s32.u32 %r4, %r1, %r2, %r3; 1380f0a96b8SYoungsuk Kim; CHECK-NEXT: st.param.b32 [func_retval0], %r4; 139099bf20cSAlex MacLean; CHECK-NEXT: ret; 140099bf20cSAlex MacLean %call = call i32 @llvm.nvvm.idp2a.s.u(i32 %a, i32 %b, i1 0, i32 %c) 141099bf20cSAlex MacLean ret i32 %call 142099bf20cSAlex MacLean} 143099bf20cSAlex MacLean 144099bf20cSAlex MacLeandefine i32 @test_dp2a_lo_s32_s32(i32 %a, i32 %b, i32 %c) { 145099bf20cSAlex MacLean; CHECK-LABEL: test_dp2a_lo_s32_s32( 146099bf20cSAlex MacLean; CHECK: { 147099bf20cSAlex MacLean; CHECK-NEXT: .reg .b32 %r<5>; 148099bf20cSAlex MacLean; CHECK-EMPTY: 149099bf20cSAlex MacLean; CHECK-NEXT: // %bb.0: 150099bf20cSAlex MacLean; CHECK-NEXT: ld.param.u32 %r1, [test_dp2a_lo_s32_s32_param_0]; 151099bf20cSAlex MacLean; CHECK-NEXT: ld.param.u32 %r2, [test_dp2a_lo_s32_s32_param_1]; 152099bf20cSAlex MacLean; CHECK-NEXT: ld.param.u32 %r3, [test_dp2a_lo_s32_s32_param_2]; 153099bf20cSAlex MacLean; CHECK-NEXT: dp2a.lo.s32.s32 %r4, %r1, %r2, %r3; 1540f0a96b8SYoungsuk Kim; CHECK-NEXT: st.param.b32 [func_retval0], %r4; 155099bf20cSAlex MacLean; CHECK-NEXT: ret; 156099bf20cSAlex MacLean %call = call i32 @llvm.nvvm.idp2a.s.s(i32 %a, i32 %b, i1 0, i32 %c) 157099bf20cSAlex MacLean ret i32 %call 158099bf20cSAlex MacLean} 159099bf20cSAlex MacLean 160099bf20cSAlex MacLeandefine i32 @test_dp2a_hi_u32_u32(i32 %a, i32 %b, i32 %c) { 161099bf20cSAlex MacLean; CHECK-LABEL: test_dp2a_hi_u32_u32( 162099bf20cSAlex MacLean; CHECK: { 163099bf20cSAlex MacLean; CHECK-NEXT: .reg .b32 %r<5>; 164099bf20cSAlex MacLean; CHECK-EMPTY: 165099bf20cSAlex MacLean; CHECK-NEXT: // %bb.0: 166099bf20cSAlex MacLean; CHECK-NEXT: ld.param.u32 %r1, [test_dp2a_hi_u32_u32_param_0]; 167099bf20cSAlex MacLean; CHECK-NEXT: ld.param.u32 %r2, [test_dp2a_hi_u32_u32_param_1]; 168099bf20cSAlex MacLean; CHECK-NEXT: ld.param.u32 %r3, [test_dp2a_hi_u32_u32_param_2]; 169099bf20cSAlex MacLean; CHECK-NEXT: dp2a.hi.u32.u32 %r4, %r1, %r2, %r3; 1700f0a96b8SYoungsuk Kim; CHECK-NEXT: st.param.b32 [func_retval0], %r4; 171099bf20cSAlex MacLean; CHECK-NEXT: ret; 172099bf20cSAlex MacLean %call = call i32 @llvm.nvvm.idp2a.u.u(i32 %a, i32 %b, i1 1, i32 %c) 173099bf20cSAlex MacLean ret i32 %call 174099bf20cSAlex MacLean} 175099bf20cSAlex MacLean 176099bf20cSAlex MacLeandefine i32 @test_dp2a_hi_u32_s32(i32 %a, i32 %b, i32 %c) { 177099bf20cSAlex MacLean; CHECK-LABEL: test_dp2a_hi_u32_s32( 178099bf20cSAlex MacLean; CHECK: { 179099bf20cSAlex MacLean; CHECK-NEXT: .reg .b32 %r<5>; 180099bf20cSAlex MacLean; CHECK-EMPTY: 181099bf20cSAlex MacLean; CHECK-NEXT: // %bb.0: 182099bf20cSAlex MacLean; CHECK-NEXT: ld.param.u32 %r1, [test_dp2a_hi_u32_s32_param_0]; 183099bf20cSAlex MacLean; CHECK-NEXT: ld.param.u32 %r2, [test_dp2a_hi_u32_s32_param_1]; 184099bf20cSAlex MacLean; CHECK-NEXT: ld.param.u32 %r3, [test_dp2a_hi_u32_s32_param_2]; 185099bf20cSAlex MacLean; CHECK-NEXT: dp2a.hi.u32.s32 %r4, %r1, %r2, %r3; 1860f0a96b8SYoungsuk Kim; CHECK-NEXT: st.param.b32 [func_retval0], %r4; 187099bf20cSAlex MacLean; CHECK-NEXT: ret; 188099bf20cSAlex MacLean %call = call i32 @llvm.nvvm.idp2a.u.s(i32 %a, i32 %b, i1 1, i32 %c) 189099bf20cSAlex MacLean ret i32 %call 190099bf20cSAlex MacLean} 191099bf20cSAlex MacLean 192099bf20cSAlex MacLeandefine i32 @test_dp2a_hi_s32_u32(i32 %a, i32 %b, i32 %c) { 193099bf20cSAlex MacLean; CHECK-LABEL: test_dp2a_hi_s32_u32( 194099bf20cSAlex MacLean; CHECK: { 195099bf20cSAlex MacLean; CHECK-NEXT: .reg .b32 %r<5>; 196099bf20cSAlex MacLean; CHECK-EMPTY: 197099bf20cSAlex MacLean; CHECK-NEXT: // %bb.0: 198099bf20cSAlex MacLean; CHECK-NEXT: ld.param.u32 %r1, [test_dp2a_hi_s32_u32_param_0]; 199099bf20cSAlex MacLean; CHECK-NEXT: ld.param.u32 %r2, [test_dp2a_hi_s32_u32_param_1]; 200099bf20cSAlex MacLean; CHECK-NEXT: ld.param.u32 %r3, [test_dp2a_hi_s32_u32_param_2]; 201099bf20cSAlex MacLean; CHECK-NEXT: dp2a.hi.s32.u32 %r4, %r1, %r2, %r3; 2020f0a96b8SYoungsuk Kim; CHECK-NEXT: st.param.b32 [func_retval0], %r4; 203099bf20cSAlex MacLean; CHECK-NEXT: ret; 204099bf20cSAlex MacLean %call = call i32 @llvm.nvvm.idp2a.s.u(i32 %a, i32 %b, i1 1, i32 %c) 205099bf20cSAlex MacLean ret i32 %call 206099bf20cSAlex MacLean} 207099bf20cSAlex MacLean 208099bf20cSAlex MacLeandefine i32 @test_dp2a_hi_s32_s32(i32 %a, i32 %b, i32 %c) { 209099bf20cSAlex MacLean; CHECK-LABEL: test_dp2a_hi_s32_s32( 210099bf20cSAlex MacLean; CHECK: { 211099bf20cSAlex MacLean; CHECK-NEXT: .reg .b32 %r<5>; 212099bf20cSAlex MacLean; CHECK-EMPTY: 213099bf20cSAlex MacLean; CHECK-NEXT: // %bb.0: 214099bf20cSAlex MacLean; CHECK-NEXT: ld.param.u32 %r1, [test_dp2a_hi_s32_s32_param_0]; 215099bf20cSAlex MacLean; CHECK-NEXT: ld.param.u32 %r2, [test_dp2a_hi_s32_s32_param_1]; 216099bf20cSAlex MacLean; CHECK-NEXT: ld.param.u32 %r3, [test_dp2a_hi_s32_s32_param_2]; 217099bf20cSAlex MacLean; CHECK-NEXT: dp2a.hi.s32.s32 %r4, %r1, %r2, %r3; 2180f0a96b8SYoungsuk Kim; CHECK-NEXT: st.param.b32 [func_retval0], %r4; 219099bf20cSAlex MacLean; CHECK-NEXT: ret; 220099bf20cSAlex MacLean %call = call i32 @llvm.nvvm.idp2a.s.s(i32 %a, i32 %b, i1 1, i32 %c) 221099bf20cSAlex MacLean ret i32 %call 222099bf20cSAlex MacLean} 223