xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/vp-splice-fixed-vectors.ll (revision d8d131dfa99762ccdd2116661980b7d0493cd7b5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v -verify-machineinstrs -riscv-v-vector-bits-min=128 \
3; RUN:   < %s | FileCheck %s
4
5declare <2 x i64> @llvm.experimental.vp.splice.v2i64(<2 x i64>, <2 x i64>, i32, <2 x i1>, i32, i32)
6declare <4 x i32> @llvm.experimental.vp.splice.v4i32(<4 x i32>, <4 x i32>, i32, <4 x i1>, i32, i32)
7declare <8 x i16> @llvm.experimental.vp.splice.v8i16(<8 x i16>, <8 x i16>, i32, <8 x i1>, i32, i32)
8declare <16 x i8> @llvm.experimental.vp.splice.v16i8(<16 x i8>, <16 x i8>, i32, <16 x i1>, i32, i32)
9
10declare <2 x double> @llvm.experimental.vp.splice.v2f64(<2 x double>, <2 x double>, i32, <2 x i1>, i32, i32)
11declare <4 x float> @llvm.experimental.vp.splice.v4f32(<4 x float>, <4 x float>, i32, <4 x i1>, i32, i32)
12
13define <2 x i64> @test_vp_splice_v2i64(<2 x i64> %va, <2 x i64> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
14; CHECK-LABEL: test_vp_splice_v2i64:
15; CHECK:       # %bb.0:
16; CHECK-NEXT:    addi a0, a0, -5
17; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
18; CHECK-NEXT:    vslidedown.vi v8, v8, 5
19; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma
20; CHECK-NEXT:    vslideup.vx v8, v9, a0
21; CHECK-NEXT:    ret
22
23  %v = call <2 x i64> @llvm.experimental.vp.splice.v2i64(<2 x i64> %va, <2 x i64> %vb, i32 5, <2 x i1> splat (i1 1), i32 %evla, i32 %evlb)
24  ret <2 x i64> %v
25}
26
27define <2 x i64> @test_vp_splice_v2i64_negative_offset(<2 x i64> %va, <2 x i64> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
28; CHECK-LABEL: test_vp_splice_v2i64_negative_offset:
29; CHECK:       # %bb.0:
30; CHECK-NEXT:    addi a0, a0, -5
31; CHECK-NEXT:    vsetivli zero, 5, e64, m1, ta, ma
32; CHECK-NEXT:    vslidedown.vx v8, v8, a0
33; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma
34; CHECK-NEXT:    vslideup.vi v8, v9, 5
35; CHECK-NEXT:    ret
36
37  %v = call <2 x i64> @llvm.experimental.vp.splice.v2i64(<2 x i64> %va, <2 x i64> %vb, i32 -5, <2 x i1> splat (i1 1), i32 %evla, i32 %evlb)
38  ret <2 x i64> %v
39}
40
41define <2 x i64> @test_vp_splice_v2i64_masked(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) {
42; CHECK-LABEL: test_vp_splice_v2i64_masked:
43; CHECK:       # %bb.0:
44; CHECK-NEXT:    addi a0, a0, -5
45; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
46; CHECK-NEXT:    vslidedown.vi v8, v8, 5, v0.t
47; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
48; CHECK-NEXT:    vslideup.vx v8, v9, a0, v0.t
49; CHECK-NEXT:    ret
50  %v = call <2 x i64> @llvm.experimental.vp.splice.v2i64(<2 x i64> %va, <2 x i64> %vb, i32 5, <2 x i1> %mask, i32 %evla, i32 %evlb)
51  ret <2 x i64> %v
52}
53
54define <4 x i32> @test_vp_splice_v4i32(<4 x i32> %va, <4 x i32> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
55; CHECK-LABEL: test_vp_splice_v4i32:
56; CHECK:       # %bb.0:
57; CHECK-NEXT:    addi a0, a0, -5
58; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
59; CHECK-NEXT:    vslidedown.vi v8, v8, 5
60; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
61; CHECK-NEXT:    vslideup.vx v8, v9, a0
62; CHECK-NEXT:    ret
63
64  %v = call <4 x i32> @llvm.experimental.vp.splice.v4i32(<4 x i32> %va, <4 x i32> %vb, i32 5, <4 x i1> splat (i1 1), i32 %evla, i32 %evlb)
65  ret <4 x i32> %v
66}
67
68define <4 x i32> @test_vp_splice_v4i32_negative_offset(<4 x i32> %va, <4 x i32> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
69; CHECK-LABEL: test_vp_splice_v4i32_negative_offset:
70; CHECK:       # %bb.0:
71; CHECK-NEXT:    addi a0, a0, -5
72; CHECK-NEXT:    vsetivli zero, 5, e32, m1, ta, ma
73; CHECK-NEXT:    vslidedown.vx v8, v8, a0
74; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
75; CHECK-NEXT:    vslideup.vi v8, v9, 5
76; CHECK-NEXT:    ret
77
78  %v = call <4 x i32> @llvm.experimental.vp.splice.v4i32(<4 x i32> %va, <4 x i32> %vb, i32 -5, <4 x i1> splat (i1 1), i32 %evla, i32 %evlb)
79  ret <4 x i32> %v
80}
81
82define <4 x i32> @test_vp_splice_v4i32_masked(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) {
83; CHECK-LABEL: test_vp_splice_v4i32_masked:
84; CHECK:       # %bb.0:
85; CHECK-NEXT:    addi a0, a0, -5
86; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
87; CHECK-NEXT:    vslidedown.vi v8, v8, 5, v0.t
88; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
89; CHECK-NEXT:    vslideup.vx v8, v9, a0, v0.t
90; CHECK-NEXT:    ret
91  %v = call <4 x i32> @llvm.experimental.vp.splice.v4i32(<4 x i32> %va, <4 x i32> %vb, i32 5, <4 x i1> %mask, i32 %evla, i32 %evlb)
92  ret <4 x i32> %v
93}
94
95define <8 x i16> @test_vp_splice_v8i16(<8 x i16> %va, <8 x i16> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
96; CHECK-LABEL: test_vp_splice_v8i16:
97; CHECK:       # %bb.0:
98; CHECK-NEXT:    addi a0, a0, -5
99; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
100; CHECK-NEXT:    vslidedown.vi v8, v8, 5
101; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
102; CHECK-NEXT:    vslideup.vx v8, v9, a0
103; CHECK-NEXT:    ret
104
105  %v = call <8 x i16> @llvm.experimental.vp.splice.v8i16(<8 x i16> %va, <8 x i16> %vb, i32 5, <8 x i1> splat (i1 1), i32 %evla, i32 %evlb)
106  ret <8 x i16> %v
107}
108
109define <8 x i16> @test_vp_splice_v8i16_negative_offset(<8 x i16> %va, <8 x i16> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
110; CHECK-LABEL: test_vp_splice_v8i16_negative_offset:
111; CHECK:       # %bb.0:
112; CHECK-NEXT:    addi a0, a0, -5
113; CHECK-NEXT:    vsetivli zero, 5, e16, m1, ta, ma
114; CHECK-NEXT:    vslidedown.vx v8, v8, a0
115; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
116; CHECK-NEXT:    vslideup.vi v8, v9, 5
117; CHECK-NEXT:    ret
118
119  %v = call <8 x i16> @llvm.experimental.vp.splice.v8i16(<8 x i16> %va, <8 x i16> %vb, i32 -5, <8 x i1> splat (i1 1), i32 %evla, i32 %evlb)
120  ret <8 x i16> %v
121}
122
123define <8 x i16> @test_vp_splice_v8i16_masked(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) {
124; CHECK-LABEL: test_vp_splice_v8i16_masked:
125; CHECK:       # %bb.0:
126; CHECK-NEXT:    addi a0, a0, -5
127; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
128; CHECK-NEXT:    vslidedown.vi v8, v8, 5, v0.t
129; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
130; CHECK-NEXT:    vslideup.vx v8, v9, a0, v0.t
131; CHECK-NEXT:    ret
132  %v = call <8 x i16> @llvm.experimental.vp.splice.v8i16(<8 x i16> %va, <8 x i16> %vb, i32 5, <8 x i1> %mask, i32 %evla, i32 %evlb)
133  ret <8 x i16> %v
134}
135
136define <16 x i8> @test_vp_splice_v16i8(<16 x i8> %va, <16 x i8> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
137; CHECK-LABEL: test_vp_splice_v16i8:
138; CHECK:       # %bb.0:
139; CHECK-NEXT:    addi a0, a0, -5
140; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
141; CHECK-NEXT:    vslidedown.vi v8, v8, 5
142; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
143; CHECK-NEXT:    vslideup.vx v8, v9, a0
144; CHECK-NEXT:    ret
145
146  %v = call <16 x i8> @llvm.experimental.vp.splice.v16i8(<16 x i8> %va, <16 x i8> %vb, i32 5, <16 x i1> splat (i1 1), i32 %evla, i32 %evlb)
147  ret <16 x i8> %v
148}
149
150define <16 x i8> @test_vp_splice_v16i8_negative_offset(<16 x i8> %va, <16 x i8> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
151; CHECK-LABEL: test_vp_splice_v16i8_negative_offset:
152; CHECK:       # %bb.0:
153; CHECK-NEXT:    addi a0, a0, -5
154; CHECK-NEXT:    vsetivli zero, 5, e8, m1, ta, ma
155; CHECK-NEXT:    vslidedown.vx v8, v8, a0
156; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
157; CHECK-NEXT:    vslideup.vi v8, v9, 5
158; CHECK-NEXT:    ret
159
160  %v = call <16 x i8> @llvm.experimental.vp.splice.v16i8(<16 x i8> %va, <16 x i8> %vb, i32 -5, <16 x i1> splat (i1 1), i32 %evla, i32 %evlb)
161  ret <16 x i8> %v
162}
163
164define <16 x i8> @test_vp_splice_v16i8_masked(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) {
165; CHECK-LABEL: test_vp_splice_v16i8_masked:
166; CHECK:       # %bb.0:
167; CHECK-NEXT:    addi a0, a0, -5
168; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
169; CHECK-NEXT:    vslidedown.vi v8, v8, 5, v0.t
170; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, mu
171; CHECK-NEXT:    vslideup.vx v8, v9, a0, v0.t
172; CHECK-NEXT:    ret
173  %v = call <16 x i8> @llvm.experimental.vp.splice.v16i8(<16 x i8> %va, <16 x i8> %vb, i32 5, <16 x i1> %mask, i32 %evla, i32 %evlb)
174  ret <16 x i8> %v
175}
176
177define <2 x double> @test_vp_splice_v2f64(<2 x double> %va, <2 x double> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
178; CHECK-LABEL: test_vp_splice_v2f64:
179; CHECK:       # %bb.0:
180; CHECK-NEXT:    addi a0, a0, -5
181; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
182; CHECK-NEXT:    vslidedown.vi v8, v8, 5
183; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma
184; CHECK-NEXT:    vslideup.vx v8, v9, a0
185; CHECK-NEXT:    ret
186
187  %v = call <2 x double> @llvm.experimental.vp.splice.v2f64(<2 x double> %va, <2 x double> %vb, i32 5, <2 x i1> splat (i1 1), i32 %evla, i32 %evlb)
188  ret <2 x double> %v
189}
190
191define <2 x double> @test_vp_splice_v2f64_negative_offset(<2 x double> %va, <2 x double> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
192; CHECK-LABEL: test_vp_splice_v2f64_negative_offset:
193; CHECK:       # %bb.0:
194; CHECK-NEXT:    addi a0, a0, -5
195; CHECK-NEXT:    vsetivli zero, 5, e64, m1, ta, ma
196; CHECK-NEXT:    vslidedown.vx v8, v8, a0
197; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma
198; CHECK-NEXT:    vslideup.vi v8, v9, 5
199; CHECK-NEXT:    ret
200
201  %v = call <2 x double> @llvm.experimental.vp.splice.v2f64(<2 x double> %va, <2 x double> %vb, i32 -5, <2 x i1> splat (i1 1), i32 %evla, i32 %evlb)
202  ret <2 x double> %v
203}
204
205define <2 x double> @test_vp_splice_v2f64_masked(<2 x double> %va, <2 x double> %vb, <2 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) {
206; CHECK-LABEL: test_vp_splice_v2f64_masked:
207; CHECK:       # %bb.0:
208; CHECK-NEXT:    addi a0, a0, -5
209; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
210; CHECK-NEXT:    vslidedown.vi v8, v8, 5, v0.t
211; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
212; CHECK-NEXT:    vslideup.vx v8, v9, a0, v0.t
213; CHECK-NEXT:    ret
214  %v = call <2 x double> @llvm.experimental.vp.splice.v2f64(<2 x double> %va, <2 x double> %vb, i32 5, <2 x i1> %mask, i32 %evla, i32 %evlb)
215  ret <2 x double> %v
216}
217
218define <4 x float> @test_vp_splice_v4f32(<4 x float> %va, <4 x float> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
219; CHECK-LABEL: test_vp_splice_v4f32:
220; CHECK:       # %bb.0:
221; CHECK-NEXT:    addi a0, a0, -5
222; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
223; CHECK-NEXT:    vslidedown.vi v8, v8, 5
224; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
225; CHECK-NEXT:    vslideup.vx v8, v9, a0
226; CHECK-NEXT:    ret
227
228  %v = call <4 x float> @llvm.experimental.vp.splice.v4f32(<4 x float> %va, <4 x float> %vb, i32 5, <4 x i1> splat (i1 1), i32 %evla, i32 %evlb)
229  ret <4 x float> %v
230}
231
232define <4 x float> @test_vp_splice_v4f32_negative_offset(<4 x float> %va, <4 x float> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
233; CHECK-LABEL: test_vp_splice_v4f32_negative_offset:
234; CHECK:       # %bb.0:
235; CHECK-NEXT:    addi a0, a0, -5
236; CHECK-NEXT:    vsetivli zero, 5, e32, m1, ta, ma
237; CHECK-NEXT:    vslidedown.vx v8, v8, a0
238; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
239; CHECK-NEXT:    vslideup.vi v8, v9, 5
240; CHECK-NEXT:    ret
241
242  %v = call <4 x float> @llvm.experimental.vp.splice.v4f32(<4 x float> %va, <4 x float> %vb, i32 -5, <4 x i1> splat (i1 1), i32 %evla, i32 %evlb)
243  ret <4 x float> %v
244}
245
246define <4 x float> @test_vp_splice_v4f32_masked(<4 x float> %va, <4 x float> %vb, <4 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) {
247; CHECK-LABEL: test_vp_splice_v4f32_masked:
248; CHECK:       # %bb.0:
249; CHECK-NEXT:    addi a0, a0, -5
250; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
251; CHECK-NEXT:    vslidedown.vi v8, v8, 5, v0.t
252; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
253; CHECK-NEXT:    vslideup.vx v8, v9, a0, v0.t
254; CHECK-NEXT:    ret
255  %v = call <4 x float> @llvm.experimental.vp.splice.v4f32(<4 x float> %va, <4 x float> %vb, i32 5, <4 x i1> %mask, i32 %evla, i32 %evlb)
256  ret <4 x float> %v
257}
258