xref: /llvm-project/llvm/test/CodeGen/NVPTX/dot-product.ll (revision b279f6b098d3849f7f1c1f539b108307d5f8ae2d)
1099bf20cSAlex MacLean; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2*b279f6b0SFangrui Song; RUN: llc < %s -mtriple=nvptx -mcpu=sm_61 | FileCheck %s
3*b279f6b0SFangrui Song; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_61 | FileCheck %s
4099bf20cSAlex MacLean
5099bf20cSAlex MacLeantarget triple = "nvptx-nvidia-cuda"
6099bf20cSAlex MacLean
7099bf20cSAlex MacLeandeclare i32 @llvm.nvvm.idp4a.s.s(i32, i32, i32)
8099bf20cSAlex MacLeandeclare i32 @llvm.nvvm.idp4a.s.u(i32, i32, i32)
9099bf20cSAlex MacLeandeclare i32 @llvm.nvvm.idp4a.u.s(i32, i32, i32)
10099bf20cSAlex MacLeandeclare i32 @llvm.nvvm.idp4a.u.u(i32, i32, i32)
11099bf20cSAlex MacLean
12099bf20cSAlex MacLeandefine i32 @test_dp4a_u32_u32(i32 %a, i32 %b, i32 %c) {
13099bf20cSAlex MacLean; CHECK-LABEL: test_dp4a_u32_u32(
14099bf20cSAlex MacLean; CHECK:       {
15099bf20cSAlex MacLean; CHECK-NEXT:    .reg .b32 %r<5>;
16099bf20cSAlex MacLean; CHECK-EMPTY:
17099bf20cSAlex MacLean; CHECK-NEXT:  // %bb.0:
18099bf20cSAlex MacLean; CHECK-NEXT:    ld.param.u32 %r1, [test_dp4a_u32_u32_param_0];
19099bf20cSAlex MacLean; CHECK-NEXT:    ld.param.u32 %r2, [test_dp4a_u32_u32_param_1];
20099bf20cSAlex MacLean; CHECK-NEXT:    ld.param.u32 %r3, [test_dp4a_u32_u32_param_2];
21099bf20cSAlex MacLean; CHECK-NEXT:    dp4a.u32.u32 %r4, %r1, %r2, %r3;
220f0a96b8SYoungsuk Kim; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
23099bf20cSAlex MacLean; CHECK-NEXT:    ret;
24099bf20cSAlex MacLean  %call = call i32 @llvm.nvvm.idp4a.u.u(i32 %a, i32 %b, i32 %c)
25099bf20cSAlex MacLean  ret i32 %call
26099bf20cSAlex MacLean}
27099bf20cSAlex MacLean
28099bf20cSAlex MacLeandefine i32 @test_dp4a_u32imm_u32imm(i32 %c) {
29099bf20cSAlex MacLean; CHECK-LABEL: test_dp4a_u32imm_u32imm(
30099bf20cSAlex MacLean; CHECK:       {
31099bf20cSAlex MacLean; CHECK-NEXT:    .reg .b32 %r<4>;
32099bf20cSAlex MacLean; CHECK-EMPTY:
33099bf20cSAlex MacLean; CHECK-NEXT:  // %bb.0:
34099bf20cSAlex MacLean; CHECK-NEXT:    ld.param.u32 %r1, [test_dp4a_u32imm_u32imm_param_0];
35099bf20cSAlex MacLean; CHECK-NEXT:    mov.b32 %r2, 0;
36099bf20cSAlex MacLean; CHECK-NEXT:    dp4a.u32.u32 %r3, %r2, %r2, %r1;
370f0a96b8SYoungsuk Kim; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
38099bf20cSAlex MacLean; CHECK-NEXT:    ret;
39099bf20cSAlex MacLean  %call = call i32 @llvm.nvvm.idp4a.u.u(i32 0, i32 0, i32 %c)
40099bf20cSAlex MacLean  ret i32 %call
41099bf20cSAlex MacLean}
42099bf20cSAlex MacLean
43099bf20cSAlex MacLeandefine i32 @test_dp4a_u32_s32(i32 %a, i32 %b, i32 %c) {
44099bf20cSAlex MacLean; CHECK-LABEL: test_dp4a_u32_s32(
45099bf20cSAlex MacLean; CHECK:       {
46099bf20cSAlex MacLean; CHECK-NEXT:    .reg .b32 %r<5>;
47099bf20cSAlex MacLean; CHECK-EMPTY:
48099bf20cSAlex MacLean; CHECK-NEXT:  // %bb.0:
49099bf20cSAlex MacLean; CHECK-NEXT:    ld.param.u32 %r1, [test_dp4a_u32_s32_param_0];
50099bf20cSAlex MacLean; CHECK-NEXT:    ld.param.u32 %r2, [test_dp4a_u32_s32_param_1];
51099bf20cSAlex MacLean; CHECK-NEXT:    ld.param.u32 %r3, [test_dp4a_u32_s32_param_2];
52099bf20cSAlex MacLean; CHECK-NEXT:    dp4a.u32.s32 %r4, %r1, %r2, %r3;
530f0a96b8SYoungsuk Kim; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
54099bf20cSAlex MacLean; CHECK-NEXT:    ret;
55099bf20cSAlex MacLean  %call = call i32 @llvm.nvvm.idp4a.u.s(i32 %a, i32 %b, i32 %c)
56099bf20cSAlex MacLean  ret i32 %call
57099bf20cSAlex MacLean}
58099bf20cSAlex MacLean
59099bf20cSAlex MacLeandefine i32 @test_dp4a_s32_u32(i32 %a, i32 %b, i32 %c) {
60099bf20cSAlex MacLean; CHECK-LABEL: test_dp4a_s32_u32(
61099bf20cSAlex MacLean; CHECK:       {
62099bf20cSAlex MacLean; CHECK-NEXT:    .reg .b32 %r<5>;
63099bf20cSAlex MacLean; CHECK-EMPTY:
64099bf20cSAlex MacLean; CHECK-NEXT:  // %bb.0:
65099bf20cSAlex MacLean; CHECK-NEXT:    ld.param.u32 %r1, [test_dp4a_s32_u32_param_0];
66099bf20cSAlex MacLean; CHECK-NEXT:    ld.param.u32 %r2, [test_dp4a_s32_u32_param_1];
67099bf20cSAlex MacLean; CHECK-NEXT:    ld.param.u32 %r3, [test_dp4a_s32_u32_param_2];
68099bf20cSAlex MacLean; CHECK-NEXT:    dp4a.s32.u32 %r4, %r1, %r2, %r3;
690f0a96b8SYoungsuk Kim; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
70099bf20cSAlex MacLean; CHECK-NEXT:    ret;
71099bf20cSAlex MacLean  %call = call i32 @llvm.nvvm.idp4a.s.u(i32 %a, i32 %b, i32 %c)
72099bf20cSAlex MacLean  ret i32 %call
73099bf20cSAlex MacLean}
74099bf20cSAlex MacLean
75099bf20cSAlex MacLeandefine i32 @test_dp4a_s32_s32(i32 %a, i32 %b, i32 %c) {
76099bf20cSAlex MacLean; CHECK-LABEL: test_dp4a_s32_s32(
77099bf20cSAlex MacLean; CHECK:       {
78099bf20cSAlex MacLean; CHECK-NEXT:    .reg .b32 %r<5>;
79099bf20cSAlex MacLean; CHECK-EMPTY:
80099bf20cSAlex MacLean; CHECK-NEXT:  // %bb.0:
81099bf20cSAlex MacLean; CHECK-NEXT:    ld.param.u32 %r1, [test_dp4a_s32_s32_param_0];
82099bf20cSAlex MacLean; CHECK-NEXT:    ld.param.u32 %r2, [test_dp4a_s32_s32_param_1];
83099bf20cSAlex MacLean; CHECK-NEXT:    ld.param.u32 %r3, [test_dp4a_s32_s32_param_2];
84099bf20cSAlex MacLean; CHECK-NEXT:    dp4a.s32.s32 %r4, %r1, %r2, %r3;
850f0a96b8SYoungsuk Kim; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
86099bf20cSAlex MacLean; CHECK-NEXT:    ret;
87099bf20cSAlex MacLean  %call = call i32 @llvm.nvvm.idp4a.s.s(i32 %a, i32 %b, i32 %c)
88099bf20cSAlex MacLean  ret i32 %call
89099bf20cSAlex MacLean}
90099bf20cSAlex MacLean
91099bf20cSAlex MacLeandeclare i32 @llvm.nvvm.idp2a.s.s(i32, i32, i1 immarg, i32)
92099bf20cSAlex MacLeandeclare i32 @llvm.nvvm.idp2a.s.u(i32, i32, i1 immarg, i32)
93099bf20cSAlex MacLeandeclare i32 @llvm.nvvm.idp2a.u.s(i32, i32, i1 immarg, i32)
94099bf20cSAlex MacLeandeclare i32 @llvm.nvvm.idp2a.u.u(i32, i32, i1 immarg, i32)
95099bf20cSAlex MacLean
96099bf20cSAlex MacLeandefine i32 @test_dp2a_lo_u32_u32(i32 %a, i32 %b, i32 %c) {
97099bf20cSAlex MacLean; CHECK-LABEL: test_dp2a_lo_u32_u32(
98099bf20cSAlex MacLean; CHECK:       {
99099bf20cSAlex MacLean; CHECK-NEXT:    .reg .b32 %r<5>;
100099bf20cSAlex MacLean; CHECK-EMPTY:
101099bf20cSAlex MacLean; CHECK-NEXT:  // %bb.0:
102099bf20cSAlex MacLean; CHECK-NEXT:    ld.param.u32 %r1, [test_dp2a_lo_u32_u32_param_0];
103099bf20cSAlex MacLean; CHECK-NEXT:    ld.param.u32 %r2, [test_dp2a_lo_u32_u32_param_1];
104099bf20cSAlex MacLean; CHECK-NEXT:    ld.param.u32 %r3, [test_dp2a_lo_u32_u32_param_2];
105099bf20cSAlex MacLean; CHECK-NEXT:    dp2a.lo.u32.u32 %r4, %r1, %r2, %r3;
1060f0a96b8SYoungsuk Kim; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
107099bf20cSAlex MacLean; CHECK-NEXT:    ret;
108099bf20cSAlex MacLean  %call = call i32 @llvm.nvvm.idp2a.u.u(i32 %a, i32 %b, i1 0, i32 %c)
109099bf20cSAlex MacLean  ret i32 %call
110099bf20cSAlex MacLean}
111099bf20cSAlex MacLean
112099bf20cSAlex MacLeandefine i32 @test_dp2a_lo_u32_s32(i32 %a, i32 %b, i32 %c) {
113099bf20cSAlex MacLean; CHECK-LABEL: test_dp2a_lo_u32_s32(
114099bf20cSAlex MacLean; CHECK:       {
115099bf20cSAlex MacLean; CHECK-NEXT:    .reg .b32 %r<5>;
116099bf20cSAlex MacLean; CHECK-EMPTY:
117099bf20cSAlex MacLean; CHECK-NEXT:  // %bb.0:
118099bf20cSAlex MacLean; CHECK-NEXT:    ld.param.u32 %r1, [test_dp2a_lo_u32_s32_param_0];
119099bf20cSAlex MacLean; CHECK-NEXT:    ld.param.u32 %r2, [test_dp2a_lo_u32_s32_param_1];
120099bf20cSAlex MacLean; CHECK-NEXT:    ld.param.u32 %r3, [test_dp2a_lo_u32_s32_param_2];
121099bf20cSAlex MacLean; CHECK-NEXT:    dp2a.lo.u32.s32 %r4, %r1, %r2, %r3;
1220f0a96b8SYoungsuk Kim; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
123099bf20cSAlex MacLean; CHECK-NEXT:    ret;
124099bf20cSAlex MacLean  %call = call i32 @llvm.nvvm.idp2a.u.s(i32 %a, i32 %b, i1 0, i32 %c)
125099bf20cSAlex MacLean  ret i32 %call
126099bf20cSAlex MacLean}
127099bf20cSAlex MacLean
128099bf20cSAlex MacLeandefine i32 @test_dp2a_lo_s32_u32(i32 %a, i32 %b, i32 %c) {
129099bf20cSAlex MacLean; CHECK-LABEL: test_dp2a_lo_s32_u32(
130099bf20cSAlex MacLean; CHECK:       {
131099bf20cSAlex MacLean; CHECK-NEXT:    .reg .b32 %r<5>;
132099bf20cSAlex MacLean; CHECK-EMPTY:
133099bf20cSAlex MacLean; CHECK-NEXT:  // %bb.0:
134099bf20cSAlex MacLean; CHECK-NEXT:    ld.param.u32 %r1, [test_dp2a_lo_s32_u32_param_0];
135099bf20cSAlex MacLean; CHECK-NEXT:    ld.param.u32 %r2, [test_dp2a_lo_s32_u32_param_1];
136099bf20cSAlex MacLean; CHECK-NEXT:    ld.param.u32 %r3, [test_dp2a_lo_s32_u32_param_2];
137099bf20cSAlex MacLean; CHECK-NEXT:    dp2a.lo.s32.u32 %r4, %r1, %r2, %r3;
1380f0a96b8SYoungsuk Kim; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
139099bf20cSAlex MacLean; CHECK-NEXT:    ret;
140099bf20cSAlex MacLean  %call = call i32 @llvm.nvvm.idp2a.s.u(i32 %a, i32 %b, i1 0, i32 %c)
141099bf20cSAlex MacLean  ret i32 %call
142099bf20cSAlex MacLean}
143099bf20cSAlex MacLean
144099bf20cSAlex MacLeandefine i32 @test_dp2a_lo_s32_s32(i32 %a, i32 %b, i32 %c) {
145099bf20cSAlex MacLean; CHECK-LABEL: test_dp2a_lo_s32_s32(
146099bf20cSAlex MacLean; CHECK:       {
147099bf20cSAlex MacLean; CHECK-NEXT:    .reg .b32 %r<5>;
148099bf20cSAlex MacLean; CHECK-EMPTY:
149099bf20cSAlex MacLean; CHECK-NEXT:  // %bb.0:
150099bf20cSAlex MacLean; CHECK-NEXT:    ld.param.u32 %r1, [test_dp2a_lo_s32_s32_param_0];
151099bf20cSAlex MacLean; CHECK-NEXT:    ld.param.u32 %r2, [test_dp2a_lo_s32_s32_param_1];
152099bf20cSAlex MacLean; CHECK-NEXT:    ld.param.u32 %r3, [test_dp2a_lo_s32_s32_param_2];
153099bf20cSAlex MacLean; CHECK-NEXT:    dp2a.lo.s32.s32 %r4, %r1, %r2, %r3;
1540f0a96b8SYoungsuk Kim; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
155099bf20cSAlex MacLean; CHECK-NEXT:    ret;
156099bf20cSAlex MacLean  %call = call i32 @llvm.nvvm.idp2a.s.s(i32 %a, i32 %b, i1 0, i32 %c)
157099bf20cSAlex MacLean  ret i32 %call
158099bf20cSAlex MacLean}
159099bf20cSAlex MacLean
160099bf20cSAlex MacLeandefine i32 @test_dp2a_hi_u32_u32(i32 %a, i32 %b, i32 %c) {
161099bf20cSAlex MacLean; CHECK-LABEL: test_dp2a_hi_u32_u32(
162099bf20cSAlex MacLean; CHECK:       {
163099bf20cSAlex MacLean; CHECK-NEXT:    .reg .b32 %r<5>;
164099bf20cSAlex MacLean; CHECK-EMPTY:
165099bf20cSAlex MacLean; CHECK-NEXT:  // %bb.0:
166099bf20cSAlex MacLean; CHECK-NEXT:    ld.param.u32 %r1, [test_dp2a_hi_u32_u32_param_0];
167099bf20cSAlex MacLean; CHECK-NEXT:    ld.param.u32 %r2, [test_dp2a_hi_u32_u32_param_1];
168099bf20cSAlex MacLean; CHECK-NEXT:    ld.param.u32 %r3, [test_dp2a_hi_u32_u32_param_2];
169099bf20cSAlex MacLean; CHECK-NEXT:    dp2a.hi.u32.u32 %r4, %r1, %r2, %r3;
1700f0a96b8SYoungsuk Kim; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
171099bf20cSAlex MacLean; CHECK-NEXT:    ret;
172099bf20cSAlex MacLean  %call = call i32 @llvm.nvvm.idp2a.u.u(i32 %a, i32 %b, i1 1, i32 %c)
173099bf20cSAlex MacLean  ret i32 %call
174099bf20cSAlex MacLean}
175099bf20cSAlex MacLean
176099bf20cSAlex MacLeandefine i32 @test_dp2a_hi_u32_s32(i32 %a, i32 %b, i32 %c) {
177099bf20cSAlex MacLean; CHECK-LABEL: test_dp2a_hi_u32_s32(
178099bf20cSAlex MacLean; CHECK:       {
179099bf20cSAlex MacLean; CHECK-NEXT:    .reg .b32 %r<5>;
180099bf20cSAlex MacLean; CHECK-EMPTY:
181099bf20cSAlex MacLean; CHECK-NEXT:  // %bb.0:
182099bf20cSAlex MacLean; CHECK-NEXT:    ld.param.u32 %r1, [test_dp2a_hi_u32_s32_param_0];
183099bf20cSAlex MacLean; CHECK-NEXT:    ld.param.u32 %r2, [test_dp2a_hi_u32_s32_param_1];
184099bf20cSAlex MacLean; CHECK-NEXT:    ld.param.u32 %r3, [test_dp2a_hi_u32_s32_param_2];
185099bf20cSAlex MacLean; CHECK-NEXT:    dp2a.hi.u32.s32 %r4, %r1, %r2, %r3;
1860f0a96b8SYoungsuk Kim; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
187099bf20cSAlex MacLean; CHECK-NEXT:    ret;
188099bf20cSAlex MacLean  %call = call i32 @llvm.nvvm.idp2a.u.s(i32 %a, i32 %b, i1 1, i32 %c)
189099bf20cSAlex MacLean  ret i32 %call
190099bf20cSAlex MacLean}
191099bf20cSAlex MacLean
192099bf20cSAlex MacLeandefine i32 @test_dp2a_hi_s32_u32(i32 %a, i32 %b, i32 %c) {
193099bf20cSAlex MacLean; CHECK-LABEL: test_dp2a_hi_s32_u32(
194099bf20cSAlex MacLean; CHECK:       {
195099bf20cSAlex MacLean; CHECK-NEXT:    .reg .b32 %r<5>;
196099bf20cSAlex MacLean; CHECK-EMPTY:
197099bf20cSAlex MacLean; CHECK-NEXT:  // %bb.0:
198099bf20cSAlex MacLean; CHECK-NEXT:    ld.param.u32 %r1, [test_dp2a_hi_s32_u32_param_0];
199099bf20cSAlex MacLean; CHECK-NEXT:    ld.param.u32 %r2, [test_dp2a_hi_s32_u32_param_1];
200099bf20cSAlex MacLean; CHECK-NEXT:    ld.param.u32 %r3, [test_dp2a_hi_s32_u32_param_2];
201099bf20cSAlex MacLean; CHECK-NEXT:    dp2a.hi.s32.u32 %r4, %r1, %r2, %r3;
2020f0a96b8SYoungsuk Kim; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
203099bf20cSAlex MacLean; CHECK-NEXT:    ret;
204099bf20cSAlex MacLean  %call = call i32 @llvm.nvvm.idp2a.s.u(i32 %a, i32 %b, i1 1, i32 %c)
205099bf20cSAlex MacLean  ret i32 %call
206099bf20cSAlex MacLean}
207099bf20cSAlex MacLean
208099bf20cSAlex MacLeandefine i32 @test_dp2a_hi_s32_s32(i32 %a, i32 %b, i32 %c) {
209099bf20cSAlex MacLean; CHECK-LABEL: test_dp2a_hi_s32_s32(
210099bf20cSAlex MacLean; CHECK:       {
211099bf20cSAlex MacLean; CHECK-NEXT:    .reg .b32 %r<5>;
212099bf20cSAlex MacLean; CHECK-EMPTY:
213099bf20cSAlex MacLean; CHECK-NEXT:  // %bb.0:
214099bf20cSAlex MacLean; CHECK-NEXT:    ld.param.u32 %r1, [test_dp2a_hi_s32_s32_param_0];
215099bf20cSAlex MacLean; CHECK-NEXT:    ld.param.u32 %r2, [test_dp2a_hi_s32_s32_param_1];
216099bf20cSAlex MacLean; CHECK-NEXT:    ld.param.u32 %r3, [test_dp2a_hi_s32_s32_param_2];
217099bf20cSAlex MacLean; CHECK-NEXT:    dp2a.hi.s32.s32 %r4, %r1, %r2, %r3;
2180f0a96b8SYoungsuk Kim; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
219099bf20cSAlex MacLean; CHECK-NEXT:    ret;
220099bf20cSAlex MacLean  %call = call i32 @llvm.nvvm.idp2a.s.s(i32 %a, i32 %b, i1 1, i32 %c)
221099bf20cSAlex MacLean  ret i32 %call
222099bf20cSAlex MacLean}
223