1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc < %s -mtriple=nvptx -mcpu=sm_61 | FileCheck %s 3; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_61 | FileCheck %s 4 5target triple = "nvptx-nvidia-cuda" 6 7declare i32 @llvm.nvvm.idp4a.s.s(i32, i32, i32) 8declare i32 @llvm.nvvm.idp4a.s.u(i32, i32, i32) 9declare i32 @llvm.nvvm.idp4a.u.s(i32, i32, i32) 10declare i32 @llvm.nvvm.idp4a.u.u(i32, i32, i32) 11 12define i32 @test_dp4a_u32_u32(i32 %a, i32 %b, i32 %c) { 13; CHECK-LABEL: test_dp4a_u32_u32( 14; CHECK: { 15; CHECK-NEXT: .reg .b32 %r<5>; 16; CHECK-EMPTY: 17; CHECK-NEXT: // %bb.0: 18; CHECK-NEXT: ld.param.u32 %r1, [test_dp4a_u32_u32_param_0]; 19; CHECK-NEXT: ld.param.u32 %r2, [test_dp4a_u32_u32_param_1]; 20; CHECK-NEXT: ld.param.u32 %r3, [test_dp4a_u32_u32_param_2]; 21; CHECK-NEXT: dp4a.u32.u32 %r4, %r1, %r2, %r3; 22; CHECK-NEXT: st.param.b32 [func_retval0], %r4; 23; CHECK-NEXT: ret; 24 %call = call i32 @llvm.nvvm.idp4a.u.u(i32 %a, i32 %b, i32 %c) 25 ret i32 %call 26} 27 28define i32 @test_dp4a_u32imm_u32imm(i32 %c) { 29; CHECK-LABEL: test_dp4a_u32imm_u32imm( 30; CHECK: { 31; CHECK-NEXT: .reg .b32 %r<4>; 32; CHECK-EMPTY: 33; CHECK-NEXT: // %bb.0: 34; CHECK-NEXT: ld.param.u32 %r1, [test_dp4a_u32imm_u32imm_param_0]; 35; CHECK-NEXT: mov.b32 %r2, 0; 36; CHECK-NEXT: dp4a.u32.u32 %r3, %r2, %r2, %r1; 37; CHECK-NEXT: st.param.b32 [func_retval0], %r3; 38; CHECK-NEXT: ret; 39 %call = call i32 @llvm.nvvm.idp4a.u.u(i32 0, i32 0, i32 %c) 40 ret i32 %call 41} 42 43define i32 @test_dp4a_u32_s32(i32 %a, i32 %b, i32 %c) { 44; CHECK-LABEL: test_dp4a_u32_s32( 45; CHECK: { 46; CHECK-NEXT: .reg .b32 %r<5>; 47; CHECK-EMPTY: 48; CHECK-NEXT: // %bb.0: 49; CHECK-NEXT: ld.param.u32 %r1, [test_dp4a_u32_s32_param_0]; 50; CHECK-NEXT: ld.param.u32 %r2, [test_dp4a_u32_s32_param_1]; 51; CHECK-NEXT: ld.param.u32 %r3, [test_dp4a_u32_s32_param_2]; 52; CHECK-NEXT: dp4a.u32.s32 %r4, %r1, %r2, %r3; 53; CHECK-NEXT: st.param.b32 [func_retval0], %r4; 54; CHECK-NEXT: ret; 55 %call = call i32 @llvm.nvvm.idp4a.u.s(i32 %a, i32 %b, i32 %c) 56 ret i32 %call 57} 58 59define i32 @test_dp4a_s32_u32(i32 %a, i32 %b, i32 %c) { 60; CHECK-LABEL: test_dp4a_s32_u32( 61; CHECK: { 62; CHECK-NEXT: .reg .b32 %r<5>; 63; CHECK-EMPTY: 64; CHECK-NEXT: // %bb.0: 65; CHECK-NEXT: ld.param.u32 %r1, [test_dp4a_s32_u32_param_0]; 66; CHECK-NEXT: ld.param.u32 %r2, [test_dp4a_s32_u32_param_1]; 67; CHECK-NEXT: ld.param.u32 %r3, [test_dp4a_s32_u32_param_2]; 68; CHECK-NEXT: dp4a.s32.u32 %r4, %r1, %r2, %r3; 69; CHECK-NEXT: st.param.b32 [func_retval0], %r4; 70; CHECK-NEXT: ret; 71 %call = call i32 @llvm.nvvm.idp4a.s.u(i32 %a, i32 %b, i32 %c) 72 ret i32 %call 73} 74 75define i32 @test_dp4a_s32_s32(i32 %a, i32 %b, i32 %c) { 76; CHECK-LABEL: test_dp4a_s32_s32( 77; CHECK: { 78; CHECK-NEXT: .reg .b32 %r<5>; 79; CHECK-EMPTY: 80; CHECK-NEXT: // %bb.0: 81; CHECK-NEXT: ld.param.u32 %r1, [test_dp4a_s32_s32_param_0]; 82; CHECK-NEXT: ld.param.u32 %r2, [test_dp4a_s32_s32_param_1]; 83; CHECK-NEXT: ld.param.u32 %r3, [test_dp4a_s32_s32_param_2]; 84; CHECK-NEXT: dp4a.s32.s32 %r4, %r1, %r2, %r3; 85; CHECK-NEXT: st.param.b32 [func_retval0], %r4; 86; CHECK-NEXT: ret; 87 %call = call i32 @llvm.nvvm.idp4a.s.s(i32 %a, i32 %b, i32 %c) 88 ret i32 %call 89} 90 91declare i32 @llvm.nvvm.idp2a.s.s(i32, i32, i1 immarg, i32) 92declare i32 @llvm.nvvm.idp2a.s.u(i32, i32, i1 immarg, i32) 93declare i32 @llvm.nvvm.idp2a.u.s(i32, i32, i1 immarg, i32) 94declare i32 @llvm.nvvm.idp2a.u.u(i32, i32, i1 immarg, i32) 95 96define i32 @test_dp2a_lo_u32_u32(i32 %a, i32 %b, i32 %c) { 97; CHECK-LABEL: test_dp2a_lo_u32_u32( 98; CHECK: { 99; CHECK-NEXT: .reg .b32 %r<5>; 100; CHECK-EMPTY: 101; CHECK-NEXT: // %bb.0: 102; CHECK-NEXT: ld.param.u32 %r1, [test_dp2a_lo_u32_u32_param_0]; 103; CHECK-NEXT: ld.param.u32 %r2, [test_dp2a_lo_u32_u32_param_1]; 104; CHECK-NEXT: ld.param.u32 %r3, [test_dp2a_lo_u32_u32_param_2]; 105; CHECK-NEXT: dp2a.lo.u32.u32 %r4, %r1, %r2, %r3; 106; CHECK-NEXT: st.param.b32 [func_retval0], %r4; 107; CHECK-NEXT: ret; 108 %call = call i32 @llvm.nvvm.idp2a.u.u(i32 %a, i32 %b, i1 0, i32 %c) 109 ret i32 %call 110} 111 112define i32 @test_dp2a_lo_u32_s32(i32 %a, i32 %b, i32 %c) { 113; CHECK-LABEL: test_dp2a_lo_u32_s32( 114; CHECK: { 115; CHECK-NEXT: .reg .b32 %r<5>; 116; CHECK-EMPTY: 117; CHECK-NEXT: // %bb.0: 118; CHECK-NEXT: ld.param.u32 %r1, [test_dp2a_lo_u32_s32_param_0]; 119; CHECK-NEXT: ld.param.u32 %r2, [test_dp2a_lo_u32_s32_param_1]; 120; CHECK-NEXT: ld.param.u32 %r3, [test_dp2a_lo_u32_s32_param_2]; 121; CHECK-NEXT: dp2a.lo.u32.s32 %r4, %r1, %r2, %r3; 122; CHECK-NEXT: st.param.b32 [func_retval0], %r4; 123; CHECK-NEXT: ret; 124 %call = call i32 @llvm.nvvm.idp2a.u.s(i32 %a, i32 %b, i1 0, i32 %c) 125 ret i32 %call 126} 127 128define i32 @test_dp2a_lo_s32_u32(i32 %a, i32 %b, i32 %c) { 129; CHECK-LABEL: test_dp2a_lo_s32_u32( 130; CHECK: { 131; CHECK-NEXT: .reg .b32 %r<5>; 132; CHECK-EMPTY: 133; CHECK-NEXT: // %bb.0: 134; CHECK-NEXT: ld.param.u32 %r1, [test_dp2a_lo_s32_u32_param_0]; 135; CHECK-NEXT: ld.param.u32 %r2, [test_dp2a_lo_s32_u32_param_1]; 136; CHECK-NEXT: ld.param.u32 %r3, [test_dp2a_lo_s32_u32_param_2]; 137; CHECK-NEXT: dp2a.lo.s32.u32 %r4, %r1, %r2, %r3; 138; CHECK-NEXT: st.param.b32 [func_retval0], %r4; 139; CHECK-NEXT: ret; 140 %call = call i32 @llvm.nvvm.idp2a.s.u(i32 %a, i32 %b, i1 0, i32 %c) 141 ret i32 %call 142} 143 144define i32 @test_dp2a_lo_s32_s32(i32 %a, i32 %b, i32 %c) { 145; CHECK-LABEL: test_dp2a_lo_s32_s32( 146; CHECK: { 147; CHECK-NEXT: .reg .b32 %r<5>; 148; CHECK-EMPTY: 149; CHECK-NEXT: // %bb.0: 150; CHECK-NEXT: ld.param.u32 %r1, [test_dp2a_lo_s32_s32_param_0]; 151; CHECK-NEXT: ld.param.u32 %r2, [test_dp2a_lo_s32_s32_param_1]; 152; CHECK-NEXT: ld.param.u32 %r3, [test_dp2a_lo_s32_s32_param_2]; 153; CHECK-NEXT: dp2a.lo.s32.s32 %r4, %r1, %r2, %r3; 154; CHECK-NEXT: st.param.b32 [func_retval0], %r4; 155; CHECK-NEXT: ret; 156 %call = call i32 @llvm.nvvm.idp2a.s.s(i32 %a, i32 %b, i1 0, i32 %c) 157 ret i32 %call 158} 159 160define i32 @test_dp2a_hi_u32_u32(i32 %a, i32 %b, i32 %c) { 161; CHECK-LABEL: test_dp2a_hi_u32_u32( 162; CHECK: { 163; CHECK-NEXT: .reg .b32 %r<5>; 164; CHECK-EMPTY: 165; CHECK-NEXT: // %bb.0: 166; CHECK-NEXT: ld.param.u32 %r1, [test_dp2a_hi_u32_u32_param_0]; 167; CHECK-NEXT: ld.param.u32 %r2, [test_dp2a_hi_u32_u32_param_1]; 168; CHECK-NEXT: ld.param.u32 %r3, [test_dp2a_hi_u32_u32_param_2]; 169; CHECK-NEXT: dp2a.hi.u32.u32 %r4, %r1, %r2, %r3; 170; CHECK-NEXT: st.param.b32 [func_retval0], %r4; 171; CHECK-NEXT: ret; 172 %call = call i32 @llvm.nvvm.idp2a.u.u(i32 %a, i32 %b, i1 1, i32 %c) 173 ret i32 %call 174} 175 176define i32 @test_dp2a_hi_u32_s32(i32 %a, i32 %b, i32 %c) { 177; CHECK-LABEL: test_dp2a_hi_u32_s32( 178; CHECK: { 179; CHECK-NEXT: .reg .b32 %r<5>; 180; CHECK-EMPTY: 181; CHECK-NEXT: // %bb.0: 182; CHECK-NEXT: ld.param.u32 %r1, [test_dp2a_hi_u32_s32_param_0]; 183; CHECK-NEXT: ld.param.u32 %r2, [test_dp2a_hi_u32_s32_param_1]; 184; CHECK-NEXT: ld.param.u32 %r3, [test_dp2a_hi_u32_s32_param_2]; 185; CHECK-NEXT: dp2a.hi.u32.s32 %r4, %r1, %r2, %r3; 186; CHECK-NEXT: st.param.b32 [func_retval0], %r4; 187; CHECK-NEXT: ret; 188 %call = call i32 @llvm.nvvm.idp2a.u.s(i32 %a, i32 %b, i1 1, i32 %c) 189 ret i32 %call 190} 191 192define i32 @test_dp2a_hi_s32_u32(i32 %a, i32 %b, i32 %c) { 193; CHECK-LABEL: test_dp2a_hi_s32_u32( 194; CHECK: { 195; CHECK-NEXT: .reg .b32 %r<5>; 196; CHECK-EMPTY: 197; CHECK-NEXT: // %bb.0: 198; CHECK-NEXT: ld.param.u32 %r1, [test_dp2a_hi_s32_u32_param_0]; 199; CHECK-NEXT: ld.param.u32 %r2, [test_dp2a_hi_s32_u32_param_1]; 200; CHECK-NEXT: ld.param.u32 %r3, [test_dp2a_hi_s32_u32_param_2]; 201; CHECK-NEXT: dp2a.hi.s32.u32 %r4, %r1, %r2, %r3; 202; CHECK-NEXT: st.param.b32 [func_retval0], %r4; 203; CHECK-NEXT: ret; 204 %call = call i32 @llvm.nvvm.idp2a.s.u(i32 %a, i32 %b, i1 1, i32 %c) 205 ret i32 %call 206} 207 208define i32 @test_dp2a_hi_s32_s32(i32 %a, i32 %b, i32 %c) { 209; CHECK-LABEL: test_dp2a_hi_s32_s32( 210; CHECK: { 211; CHECK-NEXT: .reg .b32 %r<5>; 212; CHECK-EMPTY: 213; CHECK-NEXT: // %bb.0: 214; CHECK-NEXT: ld.param.u32 %r1, [test_dp2a_hi_s32_s32_param_0]; 215; CHECK-NEXT: ld.param.u32 %r2, [test_dp2a_hi_s32_s32_param_1]; 216; CHECK-NEXT: ld.param.u32 %r3, [test_dp2a_hi_s32_s32_param_2]; 217; CHECK-NEXT: dp2a.hi.s32.s32 %r4, %r1, %r2, %r3; 218; CHECK-NEXT: st.param.b32 [func_retval0], %r4; 219; CHECK-NEXT: ret; 220 %call = call i32 @llvm.nvvm.idp2a.s.s(i32 %a, i32 %b, i1 1, i32 %c) 221 ret i32 %call 222} 223