xref: /llvm-project/llvm/test/CodeGen/NVPTX/dot-product.ll (revision b279f6b098d3849f7f1c1f539b108307d5f8ae2d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc < %s -mtriple=nvptx -mcpu=sm_61 | FileCheck %s
3; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_61 | FileCheck %s
4
5target triple = "nvptx-nvidia-cuda"
6
7declare i32 @llvm.nvvm.idp4a.s.s(i32, i32, i32)
8declare i32 @llvm.nvvm.idp4a.s.u(i32, i32, i32)
9declare i32 @llvm.nvvm.idp4a.u.s(i32, i32, i32)
10declare i32 @llvm.nvvm.idp4a.u.u(i32, i32, i32)
11
12define i32 @test_dp4a_u32_u32(i32 %a, i32 %b, i32 %c) {
13; CHECK-LABEL: test_dp4a_u32_u32(
14; CHECK:       {
15; CHECK-NEXT:    .reg .b32 %r<5>;
16; CHECK-EMPTY:
17; CHECK-NEXT:  // %bb.0:
18; CHECK-NEXT:    ld.param.u32 %r1, [test_dp4a_u32_u32_param_0];
19; CHECK-NEXT:    ld.param.u32 %r2, [test_dp4a_u32_u32_param_1];
20; CHECK-NEXT:    ld.param.u32 %r3, [test_dp4a_u32_u32_param_2];
21; CHECK-NEXT:    dp4a.u32.u32 %r4, %r1, %r2, %r3;
22; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
23; CHECK-NEXT:    ret;
24  %call = call i32 @llvm.nvvm.idp4a.u.u(i32 %a, i32 %b, i32 %c)
25  ret i32 %call
26}
27
28define i32 @test_dp4a_u32imm_u32imm(i32 %c) {
29; CHECK-LABEL: test_dp4a_u32imm_u32imm(
30; CHECK:       {
31; CHECK-NEXT:    .reg .b32 %r<4>;
32; CHECK-EMPTY:
33; CHECK-NEXT:  // %bb.0:
34; CHECK-NEXT:    ld.param.u32 %r1, [test_dp4a_u32imm_u32imm_param_0];
35; CHECK-NEXT:    mov.b32 %r2, 0;
36; CHECK-NEXT:    dp4a.u32.u32 %r3, %r2, %r2, %r1;
37; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
38; CHECK-NEXT:    ret;
39  %call = call i32 @llvm.nvvm.idp4a.u.u(i32 0, i32 0, i32 %c)
40  ret i32 %call
41}
42
43define i32 @test_dp4a_u32_s32(i32 %a, i32 %b, i32 %c) {
44; CHECK-LABEL: test_dp4a_u32_s32(
45; CHECK:       {
46; CHECK-NEXT:    .reg .b32 %r<5>;
47; CHECK-EMPTY:
48; CHECK-NEXT:  // %bb.0:
49; CHECK-NEXT:    ld.param.u32 %r1, [test_dp4a_u32_s32_param_0];
50; CHECK-NEXT:    ld.param.u32 %r2, [test_dp4a_u32_s32_param_1];
51; CHECK-NEXT:    ld.param.u32 %r3, [test_dp4a_u32_s32_param_2];
52; CHECK-NEXT:    dp4a.u32.s32 %r4, %r1, %r2, %r3;
53; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
54; CHECK-NEXT:    ret;
55  %call = call i32 @llvm.nvvm.idp4a.u.s(i32 %a, i32 %b, i32 %c)
56  ret i32 %call
57}
58
59define i32 @test_dp4a_s32_u32(i32 %a, i32 %b, i32 %c) {
60; CHECK-LABEL: test_dp4a_s32_u32(
61; CHECK:       {
62; CHECK-NEXT:    .reg .b32 %r<5>;
63; CHECK-EMPTY:
64; CHECK-NEXT:  // %bb.0:
65; CHECK-NEXT:    ld.param.u32 %r1, [test_dp4a_s32_u32_param_0];
66; CHECK-NEXT:    ld.param.u32 %r2, [test_dp4a_s32_u32_param_1];
67; CHECK-NEXT:    ld.param.u32 %r3, [test_dp4a_s32_u32_param_2];
68; CHECK-NEXT:    dp4a.s32.u32 %r4, %r1, %r2, %r3;
69; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
70; CHECK-NEXT:    ret;
71  %call = call i32 @llvm.nvvm.idp4a.s.u(i32 %a, i32 %b, i32 %c)
72  ret i32 %call
73}
74
75define i32 @test_dp4a_s32_s32(i32 %a, i32 %b, i32 %c) {
76; CHECK-LABEL: test_dp4a_s32_s32(
77; CHECK:       {
78; CHECK-NEXT:    .reg .b32 %r<5>;
79; CHECK-EMPTY:
80; CHECK-NEXT:  // %bb.0:
81; CHECK-NEXT:    ld.param.u32 %r1, [test_dp4a_s32_s32_param_0];
82; CHECK-NEXT:    ld.param.u32 %r2, [test_dp4a_s32_s32_param_1];
83; CHECK-NEXT:    ld.param.u32 %r3, [test_dp4a_s32_s32_param_2];
84; CHECK-NEXT:    dp4a.s32.s32 %r4, %r1, %r2, %r3;
85; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
86; CHECK-NEXT:    ret;
87  %call = call i32 @llvm.nvvm.idp4a.s.s(i32 %a, i32 %b, i32 %c)
88  ret i32 %call
89}
90
91declare i32 @llvm.nvvm.idp2a.s.s(i32, i32, i1 immarg, i32)
92declare i32 @llvm.nvvm.idp2a.s.u(i32, i32, i1 immarg, i32)
93declare i32 @llvm.nvvm.idp2a.u.s(i32, i32, i1 immarg, i32)
94declare i32 @llvm.nvvm.idp2a.u.u(i32, i32, i1 immarg, i32)
95
96define i32 @test_dp2a_lo_u32_u32(i32 %a, i32 %b, i32 %c) {
97; CHECK-LABEL: test_dp2a_lo_u32_u32(
98; CHECK:       {
99; CHECK-NEXT:    .reg .b32 %r<5>;
100; CHECK-EMPTY:
101; CHECK-NEXT:  // %bb.0:
102; CHECK-NEXT:    ld.param.u32 %r1, [test_dp2a_lo_u32_u32_param_0];
103; CHECK-NEXT:    ld.param.u32 %r2, [test_dp2a_lo_u32_u32_param_1];
104; CHECK-NEXT:    ld.param.u32 %r3, [test_dp2a_lo_u32_u32_param_2];
105; CHECK-NEXT:    dp2a.lo.u32.u32 %r4, %r1, %r2, %r3;
106; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
107; CHECK-NEXT:    ret;
108  %call = call i32 @llvm.nvvm.idp2a.u.u(i32 %a, i32 %b, i1 0, i32 %c)
109  ret i32 %call
110}
111
112define i32 @test_dp2a_lo_u32_s32(i32 %a, i32 %b, i32 %c) {
113; CHECK-LABEL: test_dp2a_lo_u32_s32(
114; CHECK:       {
115; CHECK-NEXT:    .reg .b32 %r<5>;
116; CHECK-EMPTY:
117; CHECK-NEXT:  // %bb.0:
118; CHECK-NEXT:    ld.param.u32 %r1, [test_dp2a_lo_u32_s32_param_0];
119; CHECK-NEXT:    ld.param.u32 %r2, [test_dp2a_lo_u32_s32_param_1];
120; CHECK-NEXT:    ld.param.u32 %r3, [test_dp2a_lo_u32_s32_param_2];
121; CHECK-NEXT:    dp2a.lo.u32.s32 %r4, %r1, %r2, %r3;
122; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
123; CHECK-NEXT:    ret;
124  %call = call i32 @llvm.nvvm.idp2a.u.s(i32 %a, i32 %b, i1 0, i32 %c)
125  ret i32 %call
126}
127
128define i32 @test_dp2a_lo_s32_u32(i32 %a, i32 %b, i32 %c) {
129; CHECK-LABEL: test_dp2a_lo_s32_u32(
130; CHECK:       {
131; CHECK-NEXT:    .reg .b32 %r<5>;
132; CHECK-EMPTY:
133; CHECK-NEXT:  // %bb.0:
134; CHECK-NEXT:    ld.param.u32 %r1, [test_dp2a_lo_s32_u32_param_0];
135; CHECK-NEXT:    ld.param.u32 %r2, [test_dp2a_lo_s32_u32_param_1];
136; CHECK-NEXT:    ld.param.u32 %r3, [test_dp2a_lo_s32_u32_param_2];
137; CHECK-NEXT:    dp2a.lo.s32.u32 %r4, %r1, %r2, %r3;
138; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
139; CHECK-NEXT:    ret;
140  %call = call i32 @llvm.nvvm.idp2a.s.u(i32 %a, i32 %b, i1 0, i32 %c)
141  ret i32 %call
142}
143
144define i32 @test_dp2a_lo_s32_s32(i32 %a, i32 %b, i32 %c) {
145; CHECK-LABEL: test_dp2a_lo_s32_s32(
146; CHECK:       {
147; CHECK-NEXT:    .reg .b32 %r<5>;
148; CHECK-EMPTY:
149; CHECK-NEXT:  // %bb.0:
150; CHECK-NEXT:    ld.param.u32 %r1, [test_dp2a_lo_s32_s32_param_0];
151; CHECK-NEXT:    ld.param.u32 %r2, [test_dp2a_lo_s32_s32_param_1];
152; CHECK-NEXT:    ld.param.u32 %r3, [test_dp2a_lo_s32_s32_param_2];
153; CHECK-NEXT:    dp2a.lo.s32.s32 %r4, %r1, %r2, %r3;
154; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
155; CHECK-NEXT:    ret;
156  %call = call i32 @llvm.nvvm.idp2a.s.s(i32 %a, i32 %b, i1 0, i32 %c)
157  ret i32 %call
158}
159
160define i32 @test_dp2a_hi_u32_u32(i32 %a, i32 %b, i32 %c) {
161; CHECK-LABEL: test_dp2a_hi_u32_u32(
162; CHECK:       {
163; CHECK-NEXT:    .reg .b32 %r<5>;
164; CHECK-EMPTY:
165; CHECK-NEXT:  // %bb.0:
166; CHECK-NEXT:    ld.param.u32 %r1, [test_dp2a_hi_u32_u32_param_0];
167; CHECK-NEXT:    ld.param.u32 %r2, [test_dp2a_hi_u32_u32_param_1];
168; CHECK-NEXT:    ld.param.u32 %r3, [test_dp2a_hi_u32_u32_param_2];
169; CHECK-NEXT:    dp2a.hi.u32.u32 %r4, %r1, %r2, %r3;
170; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
171; CHECK-NEXT:    ret;
172  %call = call i32 @llvm.nvvm.idp2a.u.u(i32 %a, i32 %b, i1 1, i32 %c)
173  ret i32 %call
174}
175
176define i32 @test_dp2a_hi_u32_s32(i32 %a, i32 %b, i32 %c) {
177; CHECK-LABEL: test_dp2a_hi_u32_s32(
178; CHECK:       {
179; CHECK-NEXT:    .reg .b32 %r<5>;
180; CHECK-EMPTY:
181; CHECK-NEXT:  // %bb.0:
182; CHECK-NEXT:    ld.param.u32 %r1, [test_dp2a_hi_u32_s32_param_0];
183; CHECK-NEXT:    ld.param.u32 %r2, [test_dp2a_hi_u32_s32_param_1];
184; CHECK-NEXT:    ld.param.u32 %r3, [test_dp2a_hi_u32_s32_param_2];
185; CHECK-NEXT:    dp2a.hi.u32.s32 %r4, %r1, %r2, %r3;
186; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
187; CHECK-NEXT:    ret;
188  %call = call i32 @llvm.nvvm.idp2a.u.s(i32 %a, i32 %b, i1 1, i32 %c)
189  ret i32 %call
190}
191
192define i32 @test_dp2a_hi_s32_u32(i32 %a, i32 %b, i32 %c) {
193; CHECK-LABEL: test_dp2a_hi_s32_u32(
194; CHECK:       {
195; CHECK-NEXT:    .reg .b32 %r<5>;
196; CHECK-EMPTY:
197; CHECK-NEXT:  // %bb.0:
198; CHECK-NEXT:    ld.param.u32 %r1, [test_dp2a_hi_s32_u32_param_0];
199; CHECK-NEXT:    ld.param.u32 %r2, [test_dp2a_hi_s32_u32_param_1];
200; CHECK-NEXT:    ld.param.u32 %r3, [test_dp2a_hi_s32_u32_param_2];
201; CHECK-NEXT:    dp2a.hi.s32.u32 %r4, %r1, %r2, %r3;
202; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
203; CHECK-NEXT:    ret;
204  %call = call i32 @llvm.nvvm.idp2a.s.u(i32 %a, i32 %b, i1 1, i32 %c)
205  ret i32 %call
206}
207
208define i32 @test_dp2a_hi_s32_s32(i32 %a, i32 %b, i32 %c) {
209; CHECK-LABEL: test_dp2a_hi_s32_s32(
210; CHECK:       {
211; CHECK-NEXT:    .reg .b32 %r<5>;
212; CHECK-EMPTY:
213; CHECK-NEXT:  // %bb.0:
214; CHECK-NEXT:    ld.param.u32 %r1, [test_dp2a_hi_s32_s32_param_0];
215; CHECK-NEXT:    ld.param.u32 %r2, [test_dp2a_hi_s32_s32_param_1];
216; CHECK-NEXT:    ld.param.u32 %r3, [test_dp2a_hi_s32_s32_param_2];
217; CHECK-NEXT:    dp2a.hi.s32.s32 %r4, %r1, %r2, %r3;
218; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
219; CHECK-NEXT:    ret;
220  %call = call i32 @llvm.nvvm.idp2a.s.s(i32 %a, i32 %b, i1 1, i32 %c)
221  ret i32 %call
222}
223