xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll (revision b6c0f1bfa79a3a32d841ac5ab1f94c3aee3b5d90)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v,+m,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s
3; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v,+m,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s
4; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+v,+m,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s
5; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+v,+m,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s
6
7declare <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
8
9define <vscale x 2 x half> @vfptrunc_nxv2f16_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> %m, i32 zeroext %vl) {
10; CHECK-LABEL: vfptrunc_nxv2f16_nxv2f32:
11; CHECK:       # %bb.0:
12; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
13; CHECK-NEXT:    vfncvt.f.f.w v9, v8, v0.t
14; CHECK-NEXT:    vmv1r.v v8, v9
15; CHECK-NEXT:    ret
16  %v = call <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> %m, i32 %vl)
17  ret <vscale x 2 x half> %v
18}
19
20define <vscale x 2 x half> @vfptrunc_nxv2f16_nxv2f32_unmasked(<vscale x 2 x float> %a, i32 zeroext %vl) {
21; CHECK-LABEL: vfptrunc_nxv2f16_nxv2f32_unmasked:
22; CHECK:       # %bb.0:
23; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
24; CHECK-NEXT:    vfncvt.f.f.w v9, v8
25; CHECK-NEXT:    vmv1r.v v8, v9
26; CHECK-NEXT:    ret
27  %v = call <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> splat (i1 true), i32 %vl)
28  ret <vscale x 2 x half> %v
29}
30
31declare <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
32
33define <vscale x 2 x half> @vfptrunc_nxv2f16_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x i1> %m, i32 zeroext %vl) {
34; CHECK-LABEL: vfptrunc_nxv2f16_nxv2f64:
35; CHECK:       # %bb.0:
36; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
37; CHECK-NEXT:    vfncvt.rod.f.f.w v10, v8, v0.t
38; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
39; CHECK-NEXT:    vfncvt.f.f.w v8, v10, v0.t
40; CHECK-NEXT:    ret
41  %v = call <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x i1> %m, i32 %vl)
42  ret <vscale x 2 x half> %v
43}
44
45define <vscale x 2 x half> @vfptrunc_nxv2f16_nxv2f64_unmasked(<vscale x 2 x double> %a, i32 zeroext %vl) {
46; CHECK-LABEL: vfptrunc_nxv2f16_nxv2f64_unmasked:
47; CHECK:       # %bb.0:
48; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
49; CHECK-NEXT:    vfncvt.rod.f.f.w v10, v8
50; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
51; CHECK-NEXT:    vfncvt.f.f.w v8, v10
52; CHECK-NEXT:    ret
53  %v = call <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x i1> splat (i1 true), i32 %vl)
54  ret <vscale x 2 x half> %v
55}
56
57declare <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f64.nxv2f32(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
58
59define <vscale x 2 x float> @vfptrunc_nxv2f32_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x i1> %m, i32 zeroext %vl) {
60; CHECK-LABEL: vfptrunc_nxv2f32_nxv2f64:
61; CHECK:       # %bb.0:
62; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
63; CHECK-NEXT:    vfncvt.f.f.w v10, v8, v0.t
64; CHECK-NEXT:    vmv.v.v v8, v10
65; CHECK-NEXT:    ret
66  %v = call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f64.nxv2f32(<vscale x 2 x double> %a, <vscale x 2 x i1> %m, i32 %vl)
67  ret <vscale x 2 x float> %v
68}
69
70define <vscale x 2 x float> @vfptrunc_nxv2f32_nxv2f64_unmasked(<vscale x 2 x double> %a, i32 zeroext %vl) {
71; CHECK-LABEL: vfptrunc_nxv2f32_nxv2f64_unmasked:
72; CHECK:       # %bb.0:
73; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
74; CHECK-NEXT:    vfncvt.f.f.w v10, v8
75; CHECK-NEXT:    vmv.v.v v8, v10
76; CHECK-NEXT:    ret
77  %v = call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f64.nxv2f32(<vscale x 2 x double> %a, <vscale x 2 x i1> splat (i1 true), i32 %vl)
78  ret <vscale x 2 x float> %v
79}
80
81declare <vscale x 7 x float> @llvm.vp.fptrunc.nxv7f64.nxv7f32(<vscale x 7 x double>, <vscale x 7 x i1>, i32)
82
83define <vscale x 7 x float> @vfptrunc_nxv7f32_nxv7f64(<vscale x 7 x double> %a, <vscale x 7 x i1> %m, i32 zeroext %vl) {
84; CHECK-LABEL: vfptrunc_nxv7f32_nxv7f64:
85; CHECK:       # %bb.0:
86; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
87; CHECK-NEXT:    vfncvt.f.f.w v16, v8, v0.t
88; CHECK-NEXT:    vmv.v.v v8, v16
89; CHECK-NEXT:    ret
90  %v = call <vscale x 7 x float> @llvm.vp.fptrunc.nxv7f64.nxv7f32(<vscale x 7 x double> %a, <vscale x 7 x i1> %m, i32 %vl)
91  ret <vscale x 7 x float> %v
92}
93
94declare <vscale x 16 x float> @llvm.vp.fptrunc.nxv16f64.nxv16f32(<vscale x 16 x double>, <vscale x 16 x i1>, i32)
95
96define <vscale x 16 x float> @vfptrunc_nxv16f32_nxv16f64(<vscale x 16 x double> %a, <vscale x 16 x i1> %m, i32 zeroext %vl) {
97; CHECK-LABEL: vfptrunc_nxv16f32_nxv16f64:
98; CHECK:       # %bb.0:
99; CHECK-NEXT:    addi sp, sp, -16
100; CHECK-NEXT:    .cfi_def_cfa_offset 16
101; CHECK-NEXT:    csrr a1, vlenb
102; CHECK-NEXT:    slli a1, a1, 3
103; CHECK-NEXT:    sub sp, sp, a1
104; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
105; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
106; CHECK-NEXT:    vmv1r.v v7, v0
107; CHECK-NEXT:    addi a1, sp, 16
108; CHECK-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
109; CHECK-NEXT:    csrr a1, vlenb
110; CHECK-NEXT:    srli a2, a1, 3
111; CHECK-NEXT:    sub a3, a0, a1
112; CHECK-NEXT:    vslidedown.vx v0, v0, a2
113; CHECK-NEXT:    sltu a2, a0, a3
114; CHECK-NEXT:    addi a2, a2, -1
115; CHECK-NEXT:    and a2, a2, a3
116; CHECK-NEXT:    addi a3, sp, 16
117; CHECK-NEXT:    vl8r.v v24, (a3) # Unknown-size Folded Reload
118; CHECK-NEXT:    vsetvli zero, a2, e32, m4, ta, ma
119; CHECK-NEXT:    vfncvt.f.f.w v20, v24, v0.t
120; CHECK-NEXT:    bltu a0, a1, .LBB7_2
121; CHECK-NEXT:  # %bb.1:
122; CHECK-NEXT:    mv a0, a1
123; CHECK-NEXT:  .LBB7_2:
124; CHECK-NEXT:    vmv1r.v v0, v7
125; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
126; CHECK-NEXT:    vfncvt.f.f.w v16, v8, v0.t
127; CHECK-NEXT:    vmv8r.v v8, v16
128; CHECK-NEXT:    csrr a0, vlenb
129; CHECK-NEXT:    slli a0, a0, 3
130; CHECK-NEXT:    add sp, sp, a0
131; CHECK-NEXT:    .cfi_def_cfa sp, 16
132; CHECK-NEXT:    addi sp, sp, 16
133; CHECK-NEXT:    .cfi_def_cfa_offset 0
134; CHECK-NEXT:    ret
135  %v = call <vscale x 16 x float> @llvm.vp.fptrunc.nxv16f64.nxv16f32(<vscale x 16 x double> %a, <vscale x 16 x i1> %m, i32 %vl)
136  ret <vscale x 16 x float> %v
137}
138
139declare <vscale x 32 x float> @llvm.vp.fptrunc.nxv32f64.nxv32f32(<vscale x 32 x double>, <vscale x 32 x i1>, i32)
140
141define <vscale x 32 x float> @vfptrunc_nxv32f32_nxv32f64(<vscale x 32 x double> %a, <vscale x 32 x i1> %m, i32 zeroext %vl) {
142; CHECK-LABEL: vfptrunc_nxv32f32_nxv32f64:
143; CHECK:       # %bb.0:
144; CHECK-NEXT:    addi sp, sp, -16
145; CHECK-NEXT:    .cfi_def_cfa_offset 16
146; CHECK-NEXT:    csrr a1, vlenb
147; CHECK-NEXT:    slli a1, a1, 4
148; CHECK-NEXT:    sub sp, sp, a1
149; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
150; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
151; CHECK-NEXT:    vmv1r.v v7, v0
152; CHECK-NEXT:    addi a1, sp, 16
153; CHECK-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
154; CHECK-NEXT:    csrr a1, vlenb
155; CHECK-NEXT:    slli a1, a1, 3
156; CHECK-NEXT:    add a1, sp, a1
157; CHECK-NEXT:    addi a1, a1, 16
158; CHECK-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
159; CHECK-NEXT:    csrr a1, vlenb
160; CHECK-NEXT:    srli a3, a1, 3
161; CHECK-NEXT:    srli a5, a1, 2
162; CHECK-NEXT:    slli a6, a1, 3
163; CHECK-NEXT:    slli a4, a1, 1
164; CHECK-NEXT:    vslidedown.vx v16, v0, a5
165; CHECK-NEXT:    add a6, a0, a6
166; CHECK-NEXT:    sub a5, a2, a4
167; CHECK-NEXT:    vl8re64.v v24, (a6)
168; CHECK-NEXT:    sltu a6, a2, a5
169; CHECK-NEXT:    addi a6, a6, -1
170; CHECK-NEXT:    and a5, a6, a5
171; CHECK-NEXT:    sub a6, a5, a1
172; CHECK-NEXT:    sltu a7, a5, a6
173; CHECK-NEXT:    addi a7, a7, -1
174; CHECK-NEXT:    vl8re64.v v8, (a0)
175; CHECK-NEXT:    vsetvli a0, zero, e8, mf4, ta, ma
176; CHECK-NEXT:    vslidedown.vx v0, v16, a3
177; CHECK-NEXT:    and a0, a7, a6
178; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
179; CHECK-NEXT:    vfncvt.f.f.w v20, v24, v0.t
180; CHECK-NEXT:    bltu a5, a1, .LBB8_2
181; CHECK-NEXT:  # %bb.1:
182; CHECK-NEXT:    mv a5, a1
183; CHECK-NEXT:  .LBB8_2:
184; CHECK-NEXT:    vmv1r.v v0, v16
185; CHECK-NEXT:    vsetvli a0, zero, e8, mf4, ta, ma
186; CHECK-NEXT:    vslidedown.vx v6, v7, a3
187; CHECK-NEXT:    vsetvli zero, a5, e32, m4, ta, ma
188; CHECK-NEXT:    vfncvt.f.f.w v16, v8, v0.t
189; CHECK-NEXT:    bltu a2, a4, .LBB8_4
190; CHECK-NEXT:  # %bb.3:
191; CHECK-NEXT:    mv a2, a4
192; CHECK-NEXT:  .LBB8_4:
193; CHECK-NEXT:    sub a0, a2, a1
194; CHECK-NEXT:    sltu a3, a2, a0
195; CHECK-NEXT:    addi a3, a3, -1
196; CHECK-NEXT:    and a0, a3, a0
197; CHECK-NEXT:    vmv1r.v v0, v6
198; CHECK-NEXT:    addi a3, sp, 16
199; CHECK-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
200; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
201; CHECK-NEXT:    vfncvt.f.f.w v28, v8, v0.t
202; CHECK-NEXT:    bltu a2, a1, .LBB8_6
203; CHECK-NEXT:  # %bb.5:
204; CHECK-NEXT:    mv a2, a1
205; CHECK-NEXT:  .LBB8_6:
206; CHECK-NEXT:    vmv1r.v v0, v7
207; CHECK-NEXT:    csrr a0, vlenb
208; CHECK-NEXT:    slli a0, a0, 3
209; CHECK-NEXT:    add a0, sp, a0
210; CHECK-NEXT:    addi a0, a0, 16
211; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
212; CHECK-NEXT:    vsetvli zero, a2, e32, m4, ta, ma
213; CHECK-NEXT:    vfncvt.f.f.w v24, v8, v0.t
214; CHECK-NEXT:    vmv8r.v v8, v24
215; CHECK-NEXT:    csrr a0, vlenb
216; CHECK-NEXT:    slli a0, a0, 4
217; CHECK-NEXT:    add sp, sp, a0
218; CHECK-NEXT:    .cfi_def_cfa sp, 16
219; CHECK-NEXT:    addi sp, sp, 16
220; CHECK-NEXT:    .cfi_def_cfa_offset 0
221; CHECK-NEXT:    ret
222  %v = call <vscale x 32 x float> @llvm.vp.fptrunc.nxv32f64.nxv32f32(<vscale x 32 x double> %a, <vscale x 32 x i1> %m, i32 %vl)
223  ret <vscale x 32 x float> %v
224}
225
226declare <vscale x 2 x bfloat> @llvm.vp.fptrunc.nxv2bf16.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
227
228define <vscale x 2 x bfloat> @vfptrunc_nxv2bf16_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> %m, i32 zeroext %vl) {
229; CHECK-LABEL: vfptrunc_nxv2bf16_nxv2f32:
230; CHECK:       # %bb.0:
231; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
232; CHECK-NEXT:    vfncvtbf16.f.f.w v9, v8, v0.t
233; CHECK-NEXT:    vmv1r.v v8, v9
234; CHECK-NEXT:    ret
235  %v = call <vscale x 2 x bfloat> @llvm.vp.fptrunc.nxv2bf16.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> %m, i32 %vl)
236  ret <vscale x 2 x bfloat> %v
237}
238
239define <vscale x 2 x bfloat> @vfptrunc_nxv2bf16_nxv2f32_unmasked(<vscale x 2 x float> %a, i32 zeroext %vl) {
240; CHECK-LABEL: vfptrunc_nxv2bf16_nxv2f32_unmasked:
241; CHECK:       # %bb.0:
242; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
243; CHECK-NEXT:    vfncvtbf16.f.f.w v9, v8
244; CHECK-NEXT:    vmv1r.v v8, v9
245; CHECK-NEXT:    ret
246  %v = call <vscale x 2 x bfloat> @llvm.vp.fptrunc.nxv2bf16.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> splat (i1 true), i32 %vl)
247  ret <vscale x 2 x bfloat> %v
248}
249
250declare <vscale x 2 x bfloat> @llvm.vp.fptrunc.nxv2bf16.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
251
252define <vscale x 2 x bfloat> @vfptrunc_nxv2bf16_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x i1> %m, i32 zeroext %vl) {
253; CHECK-LABEL: vfptrunc_nxv2bf16_nxv2f64:
254; CHECK:       # %bb.0:
255; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
256; CHECK-NEXT:    vfncvt.rod.f.f.w v10, v8, v0.t
257; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
258; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10, v0.t
259; CHECK-NEXT:    ret
260  %v = call <vscale x 2 x bfloat> @llvm.vp.fptrunc.nxv2bf16.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x i1> %m, i32 %vl)
261  ret <vscale x 2 x bfloat> %v
262}
263
264define <vscale x 2 x bfloat> @vfptrunc_nxv2bf16_nxv2f64_unmasked(<vscale x 2 x double> %a, i32 zeroext %vl) {
265; CHECK-LABEL: vfptrunc_nxv2bf16_nxv2f64_unmasked:
266; CHECK:       # %bb.0:
267; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
268; CHECK-NEXT:    vfncvt.rod.f.f.w v10, v8
269; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
270; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
271; CHECK-NEXT:    ret
272  %v = call <vscale x 2 x bfloat> @llvm.vp.fptrunc.nxv2bf16.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x i1> splat (i1 true), i32 %vl)
273  ret <vscale x 2 x bfloat> %v
274}
275