xref: /llvm-project/llvm/test/CodeGen/NVPTX/convert-sm80.ll (revision b279f6b098d3849f7f1c1f539b108307d5f8ae2d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx70 | FileCheck %s
3; RUN: %if ptxas-11.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx70 | %ptxas-verify -arch=sm_80 %}
4
5
6define <2 x bfloat> @cvt_rn_bf16x2_f32(float %f1, float %f2) {
7; CHECK-LABEL: cvt_rn_bf16x2_f32(
8; CHECK:       {
9; CHECK-NEXT:    .reg .b32 %r<2>;
10; CHECK-NEXT:    .reg .f32 %f<3>;
11; CHECK-EMPTY:
12; CHECK-NEXT:  // %bb.0:
13; CHECK-NEXT:    ld.param.f32 %f1, [cvt_rn_bf16x2_f32_param_0];
14; CHECK-NEXT:    ld.param.f32 %f2, [cvt_rn_bf16x2_f32_param_1];
15; CHECK-NEXT:    cvt.rn.bf16x2.f32 %r1, %f1, %f2;
16; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
17; CHECK-NEXT:    ret;
18  %val = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rn(float %f1, float %f2)
19  ret <2 x bfloat> %val
20}
21
22define <2 x bfloat> @cvt_rn_relu_bf16x2_f32(float %f1, float %f2) {
23; CHECK-LABEL: cvt_rn_relu_bf16x2_f32(
24; CHECK:       {
25; CHECK-NEXT:    .reg .b32 %r<2>;
26; CHECK-NEXT:    .reg .f32 %f<3>;
27; CHECK-EMPTY:
28; CHECK-NEXT:  // %bb.0:
29; CHECK-NEXT:    ld.param.f32 %f1, [cvt_rn_relu_bf16x2_f32_param_0];
30; CHECK-NEXT:    ld.param.f32 %f2, [cvt_rn_relu_bf16x2_f32_param_1];
31; CHECK-NEXT:    cvt.rn.relu.bf16x2.f32 %r1, %f1, %f2;
32; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
33; CHECK-NEXT:    ret;
34  %val = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rn.relu(float %f1, float %f2)
35  ret <2 x bfloat> %val
36}
37
38define <2 x bfloat> @cvt_rz_bf16x2_f32(float %f1, float %f2) {
39; CHECK-LABEL: cvt_rz_bf16x2_f32(
40; CHECK:       {
41; CHECK-NEXT:    .reg .b32 %r<2>;
42; CHECK-NEXT:    .reg .f32 %f<3>;
43; CHECK-EMPTY:
44; CHECK-NEXT:  // %bb.0:
45; CHECK-NEXT:    ld.param.f32 %f1, [cvt_rz_bf16x2_f32_param_0];
46; CHECK-NEXT:    ld.param.f32 %f2, [cvt_rz_bf16x2_f32_param_1];
47; CHECK-NEXT:    cvt.rz.bf16x2.f32 %r1, %f1, %f2;
48; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
49; CHECK-NEXT:    ret;
50  %val = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rz(float %f1, float %f2)
51  ret <2 x bfloat> %val
52}
53
54define <2 x bfloat> @cvt_rz_relu_bf16x2_f32(float %f1, float %f2) {
55; CHECK-LABEL: cvt_rz_relu_bf16x2_f32(
56; CHECK:       {
57; CHECK-NEXT:    .reg .b32 %r<2>;
58; CHECK-NEXT:    .reg .f32 %f<3>;
59; CHECK-EMPTY:
60; CHECK-NEXT:  // %bb.0:
61; CHECK-NEXT:    ld.param.f32 %f1, [cvt_rz_relu_bf16x2_f32_param_0];
62; CHECK-NEXT:    ld.param.f32 %f2, [cvt_rz_relu_bf16x2_f32_param_1];
63; CHECK-NEXT:    cvt.rz.relu.bf16x2.f32 %r1, %f1, %f2;
64; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
65; CHECK-NEXT:    ret;
66  %val = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rz.relu(float %f1, float %f2)
67  ret <2 x bfloat> %val
68}
69
70declare <2 x bfloat> @llvm.nvvm.ff2bf16x2.rn(float, float)
71declare <2 x bfloat> @llvm.nvvm.ff2bf16x2.rn.relu(float, float)
72declare <2 x bfloat> @llvm.nvvm.ff2bf16x2.rz(float, float)
73declare <2 x bfloat> @llvm.nvvm.ff2bf16x2.rz.relu(float, float)
74
75define <2 x half> @cvt_rn_f16x2_f32(float %f1, float %f2) {
76; CHECK-LABEL: cvt_rn_f16x2_f32(
77; CHECK:       {
78; CHECK-NEXT:    .reg .b32 %r<2>;
79; CHECK-NEXT:    .reg .f32 %f<3>;
80; CHECK-EMPTY:
81; CHECK-NEXT:  // %bb.0:
82; CHECK-NEXT:    ld.param.f32 %f1, [cvt_rn_f16x2_f32_param_0];
83; CHECK-NEXT:    ld.param.f32 %f2, [cvt_rn_f16x2_f32_param_1];
84; CHECK-NEXT:    cvt.rn.f16x2.f32 %r1, %f1, %f2;
85; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
86; CHECK-NEXT:    ret;
87  %val = call <2 x half> @llvm.nvvm.ff2f16x2.rn(float %f1, float %f2)
88  ret <2 x half> %val
89}
90
91define <2 x half> @cvt_rn_relu_f16x2_f32(float %f1, float %f2) {
92; CHECK-LABEL: cvt_rn_relu_f16x2_f32(
93; CHECK:       {
94; CHECK-NEXT:    .reg .b32 %r<2>;
95; CHECK-NEXT:    .reg .f32 %f<3>;
96; CHECK-EMPTY:
97; CHECK-NEXT:  // %bb.0:
98; CHECK-NEXT:    ld.param.f32 %f1, [cvt_rn_relu_f16x2_f32_param_0];
99; CHECK-NEXT:    ld.param.f32 %f2, [cvt_rn_relu_f16x2_f32_param_1];
100; CHECK-NEXT:    cvt.rn.relu.f16x2.f32 %r1, %f1, %f2;
101; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
102; CHECK-NEXT:    ret;
103  %val = call <2 x half> @llvm.nvvm.ff2f16x2.rn.relu(float %f1, float %f2)
104  ret <2 x half> %val
105}
106
107define <2 x half> @cvt_rz_f16x2_f32(float %f1, float %f2) {
108; CHECK-LABEL: cvt_rz_f16x2_f32(
109; CHECK:       {
110; CHECK-NEXT:    .reg .b32 %r<2>;
111; CHECK-NEXT:    .reg .f32 %f<3>;
112; CHECK-EMPTY:
113; CHECK-NEXT:  // %bb.0:
114; CHECK-NEXT:    ld.param.f32 %f1, [cvt_rz_f16x2_f32_param_0];
115; CHECK-NEXT:    ld.param.f32 %f2, [cvt_rz_f16x2_f32_param_1];
116; CHECK-NEXT:    cvt.rz.f16x2.f32 %r1, %f1, %f2;
117; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
118; CHECK-NEXT:    ret;
119  %val = call <2 x half> @llvm.nvvm.ff2f16x2.rz(float %f1, float %f2)
120  ret <2 x half> %val
121}
122
123define <2 x half> @cvt_rz_relu_f16x2_f32(float %f1, float %f2) {
124; CHECK-LABEL: cvt_rz_relu_f16x2_f32(
125; CHECK:       {
126; CHECK-NEXT:    .reg .b32 %r<2>;
127; CHECK-NEXT:    .reg .f32 %f<3>;
128; CHECK-EMPTY:
129; CHECK-NEXT:  // %bb.0:
130; CHECK-NEXT:    ld.param.f32 %f1, [cvt_rz_relu_f16x2_f32_param_0];
131; CHECK-NEXT:    ld.param.f32 %f2, [cvt_rz_relu_f16x2_f32_param_1];
132; CHECK-NEXT:    cvt.rz.relu.f16x2.f32 %r1, %f1, %f2;
133; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
134; CHECK-NEXT:    ret;
135  %val = call <2 x half> @llvm.nvvm.ff2f16x2.rz.relu(float %f1, float %f2)
136  ret <2 x half> %val
137}
138
139declare <2 x half> @llvm.nvvm.ff2f16x2.rn(float, float)
140declare <2 x half> @llvm.nvvm.ff2f16x2.rn.relu(float, float)
141declare <2 x half> @llvm.nvvm.ff2f16x2.rz(float, float)
142declare <2 x half> @llvm.nvvm.ff2f16x2.rz.relu(float, float)
143
144define bfloat @cvt_rn_bf16_f32(float %f1) {
145; CHECK-LABEL: cvt_rn_bf16_f32(
146; CHECK:       {
147; CHECK-NEXT:    .reg .b16 %rs<2>;
148; CHECK-NEXT:    .reg .f32 %f<2>;
149; CHECK-EMPTY:
150; CHECK-NEXT:  // %bb.0:
151; CHECK-NEXT:    ld.param.f32 %f1, [cvt_rn_bf16_f32_param_0];
152; CHECK-NEXT:    cvt.rn.bf16.f32 %rs1, %f1;
153; CHECK-NEXT:    st.param.b16 [func_retval0], %rs1;
154; CHECK-NEXT:    ret;
155  %val = call bfloat @llvm.nvvm.f2bf16.rn(float %f1)
156  ret bfloat %val
157}
158
159define bfloat @cvt_rn_relu_bf16_f32(float %f1) {
160; CHECK-LABEL: cvt_rn_relu_bf16_f32(
161; CHECK:       {
162; CHECK-NEXT:    .reg .b16 %rs<2>;
163; CHECK-NEXT:    .reg .f32 %f<2>;
164; CHECK-EMPTY:
165; CHECK-NEXT:  // %bb.0:
166; CHECK-NEXT:    ld.param.f32 %f1, [cvt_rn_relu_bf16_f32_param_0];
167; CHECK-NEXT:    cvt.rn.relu.bf16.f32 %rs1, %f1;
168; CHECK-NEXT:    st.param.b16 [func_retval0], %rs1;
169; CHECK-NEXT:    ret;
170  %val = call bfloat @llvm.nvvm.f2bf16.rn.relu(float %f1)
171  ret bfloat %val
172}
173
174define bfloat @cvt_rz_bf16_f32(float %f1) {
175; CHECK-LABEL: cvt_rz_bf16_f32(
176; CHECK:       {
177; CHECK-NEXT:    .reg .b16 %rs<2>;
178; CHECK-NEXT:    .reg .f32 %f<2>;
179; CHECK-EMPTY:
180; CHECK-NEXT:  // %bb.0:
181; CHECK-NEXT:    ld.param.f32 %f1, [cvt_rz_bf16_f32_param_0];
182; CHECK-NEXT:    cvt.rz.bf16.f32 %rs1, %f1;
183; CHECK-NEXT:    st.param.b16 [func_retval0], %rs1;
184; CHECK-NEXT:    ret;
185  %val = call bfloat @llvm.nvvm.f2bf16.rz(float %f1)
186  ret bfloat %val
187}
188
189define bfloat @cvt_rz_relu_bf16_f32(float %f1) {
190; CHECK-LABEL: cvt_rz_relu_bf16_f32(
191; CHECK:       {
192; CHECK-NEXT:    .reg .b16 %rs<2>;
193; CHECK-NEXT:    .reg .f32 %f<2>;
194; CHECK-EMPTY:
195; CHECK-NEXT:  // %bb.0:
196; CHECK-NEXT:    ld.param.f32 %f1, [cvt_rz_relu_bf16_f32_param_0];
197; CHECK-NEXT:    cvt.rz.relu.bf16.f32 %rs1, %f1;
198; CHECK-NEXT:    st.param.b16 [func_retval0], %rs1;
199; CHECK-NEXT:    ret;
200  %val = call bfloat @llvm.nvvm.f2bf16.rz.relu(float %f1)
201  ret bfloat %val
202}
203
204declare bfloat @llvm.nvvm.f2bf16.rn(float)
205declare bfloat @llvm.nvvm.f2bf16.rn.relu(float)
206declare bfloat @llvm.nvvm.f2bf16.rz(float)
207declare bfloat @llvm.nvvm.f2bf16.rz.relu(float)
208
209define i32 @cvt_rna_tf32_f32(float %f1) {
210; CHECK-LABEL: cvt_rna_tf32_f32(
211; CHECK:       {
212; CHECK-NEXT:    .reg .b32 %r<2>;
213; CHECK-NEXT:    .reg .f32 %f<2>;
214; CHECK-EMPTY:
215; CHECK-NEXT:  // %bb.0:
216; CHECK-NEXT:    ld.param.f32 %f1, [cvt_rna_tf32_f32_param_0];
217; CHECK-NEXT:    cvt.rna.tf32.f32 %r1, %f1;
218; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
219; CHECK-NEXT:    ret;
220  %val = call i32 @llvm.nvvm.f2tf32.rna(float %f1)
221  ret i32 %val
222}
223
224declare i32 @llvm.nvvm.f2tf32.rna(float)
225
226
227define <2 x bfloat> @fold_ff2bf16x2(float %lo, float %hi) {
228; CHECK-LABEL: fold_ff2bf16x2(
229; CHECK:       {
230; CHECK-NEXT:    .reg .b32 %r<2>;
231; CHECK-NEXT:    .reg .f32 %f<3>;
232; CHECK-EMPTY:
233; CHECK-NEXT:  // %bb.0:
234; CHECK-NEXT:    ld.param.f32 %f1, [fold_ff2bf16x2_param_0];
235; CHECK-NEXT:    ld.param.f32 %f2, [fold_ff2bf16x2_param_1];
236; CHECK-NEXT:    cvt.rn.bf16x2.f32 %r1, %f2, %f1;
237; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
238; CHECK-NEXT:    ret;
239  %loh = fptrunc float %lo to bfloat
240  %hih = fptrunc float %hi to bfloat
241  %v0 = insertelement <2 x bfloat> poison, bfloat %loh, i64 0
242  %v1 = insertelement <2 x bfloat> %v0, bfloat %hih, i64 1
243  ret <2 x bfloat> %v1
244}
245
246define <2 x half> @fold_ff2f16x2(float %lo, float %hi) {
247; CHECK-LABEL: fold_ff2f16x2(
248; CHECK:       {
249; CHECK-NEXT:    .reg .b32 %r<2>;
250; CHECK-NEXT:    .reg .f32 %f<3>;
251; CHECK-EMPTY:
252; CHECK-NEXT:  // %bb.0:
253; CHECK-NEXT:    ld.param.f32 %f1, [fold_ff2f16x2_param_0];
254; CHECK-NEXT:    ld.param.f32 %f2, [fold_ff2f16x2_param_1];
255; CHECK-NEXT:    cvt.rn.f16x2.f32 %r1, %f2, %f1;
256; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
257; CHECK-NEXT:    ret;
258  %loh = fptrunc float %lo to half
259  %hih = fptrunc float %hi to half
260  %v0 = insertelement <2 x half> poison, half %loh, i64 0
261  %v1 = insertelement <2 x half> %v0, half %hih, i64 1
262  ret <2 x half> %v1
263}
264