xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll (revision b6c0f1bfa79a3a32d841ac5ab1f94c3aee3b5d90)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfh,+zvfbfmin | FileCheck %s
3; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfh,+zvfbfmin | FileCheck %s
4; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfhmin,+zvfbfmin | FileCheck %s
5; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin | FileCheck %s
6; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvbb,+zvfh,+zvfbfmin | FileCheck %s --check-prefix=ZVBB
7; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvbb,+zvfh,+zvfbfmin | FileCheck %s --check-prefix=ZVBB
8
9; Integers
10
11define <vscale x 32 x i1> @vector_interleave_nxv32i1_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
12; CHECK-LABEL: vector_interleave_nxv32i1_nxv16i1:
13; CHECK:       # %bb.0:
14; CHECK-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
15; CHECK-NEXT:    vmv1r.v v9, v0
16; CHECK-NEXT:    vmv1r.v v0, v8
17; CHECK-NEXT:    vmv.v.i v10, 0
18; CHECK-NEXT:    li a0, -1
19; CHECK-NEXT:    csrr a1, vlenb
20; CHECK-NEXT:    vmerge.vim v12, v10, 1, v0
21; CHECK-NEXT:    vmv1r.v v0, v9
22; CHECK-NEXT:    vmerge.vim v8, v10, 1, v0
23; CHECK-NEXT:    srli a1, a1, 2
24; CHECK-NEXT:    vwaddu.vv v16, v8, v12
25; CHECK-NEXT:    vwmaccu.vx v16, a0, v12
26; CHECK-NEXT:    vmsne.vi v8, v18, 0
27; CHECK-NEXT:    vmsne.vi v0, v16, 0
28; CHECK-NEXT:    add a0, a1, a1
29; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
30; CHECK-NEXT:    vslideup.vx v0, v8, a1
31; CHECK-NEXT:    ret
32;
33; ZVBB-LABEL: vector_interleave_nxv32i1_nxv16i1:
34; ZVBB:       # %bb.0:
35; ZVBB-NEXT:    vsetvli a0, zero, e8, m2, ta, mu
36; ZVBB-NEXT:    vmv1r.v v9, v0
37; ZVBB-NEXT:    vmv1r.v v0, v8
38; ZVBB-NEXT:    vmv.v.i v10, 0
39; ZVBB-NEXT:    li a0, 1
40; ZVBB-NEXT:    csrr a1, vlenb
41; ZVBB-NEXT:    vmerge.vim v10, v10, 1, v0
42; ZVBB-NEXT:    srli a1, a1, 2
43; ZVBB-NEXT:    vwsll.vi v12, v10, 8
44; ZVBB-NEXT:    vmv1r.v v0, v9
45; ZVBB-NEXT:    vwaddu.wx v12, v12, a0, v0.t
46; ZVBB-NEXT:    vmsne.vi v8, v14, 0
47; ZVBB-NEXT:    vmsne.vi v0, v12, 0
48; ZVBB-NEXT:    add a0, a1, a1
49; ZVBB-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
50; ZVBB-NEXT:    vslideup.vx v0, v8, a1
51; ZVBB-NEXT:    ret
52  %res = call <vscale x 32 x i1> @llvm.vector.interleave2.nxv32i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
53  ret <vscale x 32 x i1> %res
54}
55
56define <vscale x 32 x i8> @vector_interleave_nxv32i8_nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
57; CHECK-LABEL: vector_interleave_nxv32i8_nxv16i8:
58; CHECK:       # %bb.0:
59; CHECK-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
60; CHECK-NEXT:    vwaddu.vv v12, v8, v10
61; CHECK-NEXT:    li a0, -1
62; CHECK-NEXT:    vwmaccu.vx v12, a0, v10
63; CHECK-NEXT:    vmv4r.v v8, v12
64; CHECK-NEXT:    ret
65;
66; ZVBB-LABEL: vector_interleave_nxv32i8_nxv16i8:
67; ZVBB:       # %bb.0:
68; ZVBB-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
69; ZVBB-NEXT:    vwsll.vi v12, v10, 8
70; ZVBB-NEXT:    vwaddu.wv v12, v12, v8
71; ZVBB-NEXT:    vmv4r.v v8, v12
72; ZVBB-NEXT:    ret
73  %res = call <vscale x 32 x i8> @llvm.vector.interleave2.nxv32i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
74  ret <vscale x 32 x i8> %res
75}
76
77define <vscale x 16 x i16> @vector_interleave_nxv16i16_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
78; CHECK-LABEL: vector_interleave_nxv16i16_nxv8i16:
79; CHECK:       # %bb.0:
80; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
81; CHECK-NEXT:    vwaddu.vv v12, v8, v10
82; CHECK-NEXT:    li a0, -1
83; CHECK-NEXT:    vwmaccu.vx v12, a0, v10
84; CHECK-NEXT:    vmv4r.v v8, v12
85; CHECK-NEXT:    ret
86;
87; ZVBB-LABEL: vector_interleave_nxv16i16_nxv8i16:
88; ZVBB:       # %bb.0:
89; ZVBB-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
90; ZVBB-NEXT:    vwsll.vi v12, v10, 16
91; ZVBB-NEXT:    vwaddu.wv v12, v12, v8
92; ZVBB-NEXT:    vmv4r.v v8, v12
93; ZVBB-NEXT:    ret
94  %res = call <vscale x 16 x i16> @llvm.vector.interleave2.nxv16i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
95  ret <vscale x 16 x i16> %res
96}
97
98define <vscale x 8 x i32> @vector_interleave_nxv8i32_nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
99; CHECK-LABEL: vector_interleave_nxv8i32_nxv4i32:
100; CHECK:       # %bb.0:
101; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
102; CHECK-NEXT:    vwaddu.vv v12, v8, v10
103; CHECK-NEXT:    li a0, -1
104; CHECK-NEXT:    vwmaccu.vx v12, a0, v10
105; CHECK-NEXT:    vmv4r.v v8, v12
106; CHECK-NEXT:    ret
107;
108; ZVBB-LABEL: vector_interleave_nxv8i32_nxv4i32:
109; ZVBB:       # %bb.0:
110; ZVBB-NEXT:    li a0, 32
111; ZVBB-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
112; ZVBB-NEXT:    vwsll.vx v12, v10, a0
113; ZVBB-NEXT:    vwaddu.wv v12, v12, v8
114; ZVBB-NEXT:    vmv4r.v v8, v12
115; ZVBB-NEXT:    ret
116  %res = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
117  ret <vscale x 8 x i32> %res
118}
119
120define <vscale x 4 x i64> @vector_interleave_nxv4i64_nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
121; CHECK-LABEL: vector_interleave_nxv4i64_nxv2i64:
122; CHECK:       # %bb.0:
123; CHECK-NEXT:    csrr a0, vlenb
124; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, mu
125; CHECK-NEXT:    vid.v v12
126; CHECK-NEXT:    srli a0, a0, 2
127; CHECK-NEXT:    vand.vi v13, v12, 1
128; CHECK-NEXT:    vmsne.vi v0, v13, 0
129; CHECK-NEXT:    vsrl.vi v16, v12, 1
130; CHECK-NEXT:    vadd.vx v16, v16, a0, v0.t
131; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
132; CHECK-NEXT:    vrgatherei16.vv v12, v8, v16
133; CHECK-NEXT:    vmv.v.v v8, v12
134; CHECK-NEXT:    ret
135;
136; ZVBB-LABEL: vector_interleave_nxv4i64_nxv2i64:
137; ZVBB:       # %bb.0:
138; ZVBB-NEXT:    csrr a0, vlenb
139; ZVBB-NEXT:    vsetvli a1, zero, e16, m1, ta, mu
140; ZVBB-NEXT:    vid.v v12
141; ZVBB-NEXT:    srli a0, a0, 2
142; ZVBB-NEXT:    vand.vi v13, v12, 1
143; ZVBB-NEXT:    vmsne.vi v0, v13, 0
144; ZVBB-NEXT:    vsrl.vi v16, v12, 1
145; ZVBB-NEXT:    vadd.vx v16, v16, a0, v0.t
146; ZVBB-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
147; ZVBB-NEXT:    vrgatherei16.vv v12, v8, v16
148; ZVBB-NEXT:    vmv.v.v v8, v12
149; ZVBB-NEXT:    ret
150  %res = call <vscale x 4 x i64> @llvm.vector.interleave2.nxv4i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
151  ret <vscale x 4 x i64> %res
152}
153
154declare <vscale x 32 x i1> @llvm.vector.interleave2.nxv32i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
155declare <vscale x 32 x i8> @llvm.vector.interleave2.nxv32i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
156declare <vscale x 16 x i16> @llvm.vector.interleave2.nxv16i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
157declare <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
158declare <vscale x 4 x i64> @llvm.vector.interleave2.nxv4i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
159
160define <vscale x 128 x i1> @vector_interleave_nxv128i1_nxv64i1(<vscale x 64 x i1> %a, <vscale x 64 x i1> %b) {
161; CHECK-LABEL: vector_interleave_nxv128i1_nxv64i1:
162; CHECK:       # %bb.0:
163; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
164; CHECK-NEXT:    vmv1r.v v9, v0
165; CHECK-NEXT:    vmv1r.v v0, v8
166; CHECK-NEXT:    vmv.v.i v24, 0
167; CHECK-NEXT:    li a0, -1
168; CHECK-NEXT:    vmerge.vim v16, v24, 1, v0
169; CHECK-NEXT:    vmv1r.v v0, v9
170; CHECK-NEXT:    vmerge.vim v24, v24, 1, v0
171; CHECK-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
172; CHECK-NEXT:    vwaddu.vv v8, v24, v16
173; CHECK-NEXT:    vwaddu.vv v0, v28, v20
174; CHECK-NEXT:    vwmaccu.vx v8, a0, v16
175; CHECK-NEXT:    vwmaccu.vx v0, a0, v20
176; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
177; CHECK-NEXT:    vmsne.vi v16, v8, 0
178; CHECK-NEXT:    vmsne.vi v8, v0, 0
179; CHECK-NEXT:    vmv1r.v v0, v16
180; CHECK-NEXT:    ret
181;
182; ZVBB-LABEL: vector_interleave_nxv128i1_nxv64i1:
183; ZVBB:       # %bb.0:
184; ZVBB-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
185; ZVBB-NEXT:    vmv.v.i v24, 0
186; ZVBB-NEXT:    vmerge.vim v16, v24, 1, v0
187; ZVBB-NEXT:    vmv1r.v v0, v8
188; ZVBB-NEXT:    vmerge.vim v24, v24, 1, v0
189; ZVBB-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
190; ZVBB-NEXT:    vwsll.vi v8, v24, 8
191; ZVBB-NEXT:    vwsll.vi v0, v28, 8
192; ZVBB-NEXT:    vwaddu.wv v8, v8, v16
193; ZVBB-NEXT:    vwaddu.wv v0, v0, v20
194; ZVBB-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
195; ZVBB-NEXT:    vmsne.vi v16, v8, 0
196; ZVBB-NEXT:    vmsne.vi v8, v0, 0
197; ZVBB-NEXT:    vmv1r.v v0, v16
198; ZVBB-NEXT:    ret
199  %res = call <vscale x 128 x i1> @llvm.vector.interleave2.nxv128i1(<vscale x 64 x i1> %a, <vscale x 64 x i1> %b)
200  ret <vscale x 128 x i1> %res
201}
202
203define <vscale x 128 x i8> @vector_interleave_nxv128i8_nxv64i8(<vscale x 64 x i8> %a, <vscale x 64 x i8> %b) {
204; CHECK-LABEL: vector_interleave_nxv128i8_nxv64i8:
205; CHECK:       # %bb.0:
206; CHECK-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
207; CHECK-NEXT:    vmv8r.v v24, v8
208; CHECK-NEXT:    vwaddu.vv v8, v24, v16
209; CHECK-NEXT:    li a0, -1
210; CHECK-NEXT:    vwaddu.vv v0, v28, v20
211; CHECK-NEXT:    vwmaccu.vx v8, a0, v16
212; CHECK-NEXT:    vwmaccu.vx v0, a0, v20
213; CHECK-NEXT:    vmv8r.v v16, v0
214; CHECK-NEXT:    ret
215;
216; ZVBB-LABEL: vector_interleave_nxv128i8_nxv64i8:
217; ZVBB:       # %bb.0:
218; ZVBB-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
219; ZVBB-NEXT:    vmv8r.v v24, v8
220; ZVBB-NEXT:    vwsll.vi v8, v16, 8
221; ZVBB-NEXT:    vwsll.vi v0, v20, 8
222; ZVBB-NEXT:    vwaddu.wv v8, v8, v24
223; ZVBB-NEXT:    vwaddu.wv v0, v0, v28
224; ZVBB-NEXT:    vmv8r.v v16, v0
225; ZVBB-NEXT:    ret
226  %res = call <vscale x 128 x i8> @llvm.vector.interleave2.nxv128i8(<vscale x 64 x i8> %a, <vscale x 64 x i8> %b)
227  ret <vscale x 128 x i8> %res
228}
229
230define <vscale x 64 x i16> @vector_interleave_nxv64i16_nxv32i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b) {
231; CHECK-LABEL: vector_interleave_nxv64i16_nxv32i16:
232; CHECK:       # %bb.0:
233; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
234; CHECK-NEXT:    vmv8r.v v24, v8
235; CHECK-NEXT:    vwaddu.vv v8, v24, v16
236; CHECK-NEXT:    li a0, -1
237; CHECK-NEXT:    vwaddu.vv v0, v28, v20
238; CHECK-NEXT:    vwmaccu.vx v8, a0, v16
239; CHECK-NEXT:    vwmaccu.vx v0, a0, v20
240; CHECK-NEXT:    vmv8r.v v16, v0
241; CHECK-NEXT:    ret
242;
243; ZVBB-LABEL: vector_interleave_nxv64i16_nxv32i16:
244; ZVBB:       # %bb.0:
245; ZVBB-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
246; ZVBB-NEXT:    vmv8r.v v24, v8
247; ZVBB-NEXT:    vwsll.vi v8, v16, 16
248; ZVBB-NEXT:    vwsll.vi v0, v20, 16
249; ZVBB-NEXT:    vwaddu.wv v8, v8, v24
250; ZVBB-NEXT:    vwaddu.wv v0, v0, v28
251; ZVBB-NEXT:    vmv8r.v v16, v0
252; ZVBB-NEXT:    ret
253  %res = call <vscale x 64 x i16> @llvm.vector.interleave2.nxv64i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b)
254  ret <vscale x 64 x i16> %res
255}
256
257define <vscale x 32 x i32> @vector_interleave_nxv32i32_nxv16i32(<vscale x 16 x i32> %a, <vscale x 16 x i32> %b) {
258; CHECK-LABEL: vector_interleave_nxv32i32_nxv16i32:
259; CHECK:       # %bb.0:
260; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
261; CHECK-NEXT:    vmv8r.v v24, v8
262; CHECK-NEXT:    vwaddu.vv v8, v24, v16
263; CHECK-NEXT:    li a0, -1
264; CHECK-NEXT:    vwaddu.vv v0, v28, v20
265; CHECK-NEXT:    vwmaccu.vx v8, a0, v16
266; CHECK-NEXT:    vwmaccu.vx v0, a0, v20
267; CHECK-NEXT:    vmv8r.v v16, v0
268; CHECK-NEXT:    ret
269;
270; ZVBB-LABEL: vector_interleave_nxv32i32_nxv16i32:
271; ZVBB:       # %bb.0:
272; ZVBB-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
273; ZVBB-NEXT:    vmv8r.v v24, v8
274; ZVBB-NEXT:    li a0, 32
275; ZVBB-NEXT:    vwsll.vx v8, v16, a0
276; ZVBB-NEXT:    vwsll.vx v0, v20, a0
277; ZVBB-NEXT:    vwaddu.wv v8, v8, v24
278; ZVBB-NEXT:    vwaddu.wv v0, v0, v28
279; ZVBB-NEXT:    vmv8r.v v16, v0
280; ZVBB-NEXT:    ret
281  %res = call <vscale x 32 x i32> @llvm.vector.interleave2.nxv32i32(<vscale x 16 x i32> %a, <vscale x 16 x i32> %b)
282  ret <vscale x 32 x i32> %res
283}
284
285define <vscale x 16 x i64> @vector_interleave_nxv16i64_nxv8i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %b) {
286; CHECK-LABEL: vector_interleave_nxv16i64_nxv8i64:
287; CHECK:       # %bb.0:
288; CHECK-NEXT:    csrr a0, vlenb
289; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, mu
290; CHECK-NEXT:    vid.v v6
291; CHECK-NEXT:    vmv8r.v v24, v8
292; CHECK-NEXT:    srli a0, a0, 1
293; CHECK-NEXT:    vmv4r.v v28, v16
294; CHECK-NEXT:    vmv4r.v v16, v12
295; CHECK-NEXT:    vand.vi v8, v6, 1
296; CHECK-NEXT:    vmsne.vi v0, v8, 0
297; CHECK-NEXT:    vsrl.vi v6, v6, 1
298; CHECK-NEXT:    vadd.vx v6, v6, a0, v0.t
299; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
300; CHECK-NEXT:    vrgatherei16.vv v8, v24, v6
301; CHECK-NEXT:    vrgatherei16.vv v24, v16, v6
302; CHECK-NEXT:    vmv.v.v v16, v24
303; CHECK-NEXT:    ret
304;
305; ZVBB-LABEL: vector_interleave_nxv16i64_nxv8i64:
306; ZVBB:       # %bb.0:
307; ZVBB-NEXT:    csrr a0, vlenb
308; ZVBB-NEXT:    vsetvli a1, zero, e16, m2, ta, mu
309; ZVBB-NEXT:    vid.v v6
310; ZVBB-NEXT:    vmv8r.v v24, v8
311; ZVBB-NEXT:    srli a0, a0, 1
312; ZVBB-NEXT:    vmv4r.v v28, v16
313; ZVBB-NEXT:    vmv4r.v v16, v12
314; ZVBB-NEXT:    vand.vi v8, v6, 1
315; ZVBB-NEXT:    vmsne.vi v0, v8, 0
316; ZVBB-NEXT:    vsrl.vi v6, v6, 1
317; ZVBB-NEXT:    vadd.vx v6, v6, a0, v0.t
318; ZVBB-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
319; ZVBB-NEXT:    vrgatherei16.vv v8, v24, v6
320; ZVBB-NEXT:    vrgatherei16.vv v24, v16, v6
321; ZVBB-NEXT:    vmv.v.v v16, v24
322; ZVBB-NEXT:    ret
323  %res = call <vscale x 16 x i64> @llvm.vector.interleave2.nxv16i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %b)
324  ret <vscale x 16 x i64> %res
325}
326
327declare <vscale x 128 x i1> @llvm.vector.interleave2.nxv128i1(<vscale x 64 x i1>, <vscale x 64 x i1>)
328declare <vscale x 128 x i8> @llvm.vector.interleave2.nxv128i8(<vscale x 64 x i8>, <vscale x 64 x i8>)
329declare <vscale x 64 x i16> @llvm.vector.interleave2.nxv64i16(<vscale x 32 x i16>, <vscale x 32 x i16>)
330declare <vscale x 32 x i32> @llvm.vector.interleave2.nxv32i32(<vscale x 16 x i32>, <vscale x 16 x i32>)
331declare <vscale x 16 x i64> @llvm.vector.interleave2.nxv16i64(<vscale x 8 x i64>, <vscale x 8 x i64>)
332
333; Floats
334
335define <vscale x 4 x bfloat> @vector_interleave_nxv4bf16_nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) {
336; CHECK-LABEL: vector_interleave_nxv4bf16_nxv2bf16:
337; CHECK:       # %bb.0:
338; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
339; CHECK-NEXT:    vwaddu.vv v10, v8, v9
340; CHECK-NEXT:    li a0, -1
341; CHECK-NEXT:    csrr a1, vlenb
342; CHECK-NEXT:    vwmaccu.vx v10, a0, v9
343; CHECK-NEXT:    srli a1, a1, 2
344; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
345; CHECK-NEXT:    vslidedown.vx v8, v10, a1
346; CHECK-NEXT:    add a0, a1, a1
347; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
348; CHECK-NEXT:    vslideup.vx v10, v8, a1
349; CHECK-NEXT:    vmv.v.v v8, v10
350; CHECK-NEXT:    ret
351;
352; ZVBB-LABEL: vector_interleave_nxv4bf16_nxv2bf16:
353; ZVBB:       # %bb.0:
354; ZVBB-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
355; ZVBB-NEXT:    vwsll.vi v10, v9, 16
356; ZVBB-NEXT:    csrr a0, vlenb
357; ZVBB-NEXT:    vwaddu.wv v10, v10, v8
358; ZVBB-NEXT:    srli a0, a0, 2
359; ZVBB-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
360; ZVBB-NEXT:    vslidedown.vx v8, v10, a0
361; ZVBB-NEXT:    add a1, a0, a0
362; ZVBB-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
363; ZVBB-NEXT:    vslideup.vx v10, v8, a0
364; ZVBB-NEXT:    vmv.v.v v8, v10
365; ZVBB-NEXT:    ret
366  %res = call <vscale x 4 x bfloat> @llvm.vector.interleave2.nxv4bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b)
367  ret <vscale x 4 x bfloat> %res
368}
369
370define <vscale x 8 x bfloat> @vector_interleave_nxv8bf16_nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) {
371; CHECK-LABEL: vector_interleave_nxv8bf16_nxv4bf16:
372; CHECK:       # %bb.0:
373; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
374; CHECK-NEXT:    vwaddu.vv v10, v8, v9
375; CHECK-NEXT:    li a0, -1
376; CHECK-NEXT:    vwmaccu.vx v10, a0, v9
377; CHECK-NEXT:    vmv2r.v v8, v10
378; CHECK-NEXT:    ret
379;
380; ZVBB-LABEL: vector_interleave_nxv8bf16_nxv4bf16:
381; ZVBB:       # %bb.0:
382; ZVBB-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
383; ZVBB-NEXT:    vwsll.vi v10, v9, 16
384; ZVBB-NEXT:    vwaddu.wv v10, v10, v8
385; ZVBB-NEXT:    vmv2r.v v8, v10
386; ZVBB-NEXT:    ret
387  %res = call <vscale x 8 x bfloat> @llvm.vector.interleave2.nxv8bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b)
388  ret <vscale x 8 x bfloat> %res
389}
390
391define <vscale x 4 x half> @vector_interleave_nxv4f16_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
392; CHECK-LABEL: vector_interleave_nxv4f16_nxv2f16:
393; CHECK:       # %bb.0:
394; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
395; CHECK-NEXT:    vwaddu.vv v10, v8, v9
396; CHECK-NEXT:    li a0, -1
397; CHECK-NEXT:    csrr a1, vlenb
398; CHECK-NEXT:    vwmaccu.vx v10, a0, v9
399; CHECK-NEXT:    srli a1, a1, 2
400; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
401; CHECK-NEXT:    vslidedown.vx v8, v10, a1
402; CHECK-NEXT:    add a0, a1, a1
403; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
404; CHECK-NEXT:    vslideup.vx v10, v8, a1
405; CHECK-NEXT:    vmv.v.v v8, v10
406; CHECK-NEXT:    ret
407;
408; ZVBB-LABEL: vector_interleave_nxv4f16_nxv2f16:
409; ZVBB:       # %bb.0:
410; ZVBB-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
411; ZVBB-NEXT:    vwsll.vi v10, v9, 16
412; ZVBB-NEXT:    csrr a0, vlenb
413; ZVBB-NEXT:    vwaddu.wv v10, v10, v8
414; ZVBB-NEXT:    srli a0, a0, 2
415; ZVBB-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
416; ZVBB-NEXT:    vslidedown.vx v8, v10, a0
417; ZVBB-NEXT:    add a1, a0, a0
418; ZVBB-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
419; ZVBB-NEXT:    vslideup.vx v10, v8, a0
420; ZVBB-NEXT:    vmv.v.v v8, v10
421; ZVBB-NEXT:    ret
422  %res = call <vscale x 4 x half> @llvm.vector.interleave2.nxv4f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b)
423  ret <vscale x 4 x half> %res
424}
425
426define <vscale x 8 x half> @vector_interleave_nxv8f16_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
427; CHECK-LABEL: vector_interleave_nxv8f16_nxv4f16:
428; CHECK:       # %bb.0:
429; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
430; CHECK-NEXT:    vwaddu.vv v10, v8, v9
431; CHECK-NEXT:    li a0, -1
432; CHECK-NEXT:    vwmaccu.vx v10, a0, v9
433; CHECK-NEXT:    vmv2r.v v8, v10
434; CHECK-NEXT:    ret
435;
436; ZVBB-LABEL: vector_interleave_nxv8f16_nxv4f16:
437; ZVBB:       # %bb.0:
438; ZVBB-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
439; ZVBB-NEXT:    vwsll.vi v10, v9, 16
440; ZVBB-NEXT:    vwaddu.wv v10, v10, v8
441; ZVBB-NEXT:    vmv2r.v v8, v10
442; ZVBB-NEXT:    ret
443  %res = call <vscale x 8 x half> @llvm.vector.interleave2.nxv8f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b)
444  ret <vscale x 8 x half> %res
445}
446
447define <vscale x 4 x float> @vector_interleave_nxv4f32_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
448; CHECK-LABEL: vector_interleave_nxv4f32_nxv2f32:
449; CHECK:       # %bb.0:
450; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
451; CHECK-NEXT:    vwaddu.vv v10, v8, v9
452; CHECK-NEXT:    li a0, -1
453; CHECK-NEXT:    vwmaccu.vx v10, a0, v9
454; CHECK-NEXT:    vmv2r.v v8, v10
455; CHECK-NEXT:    ret
456;
457; ZVBB-LABEL: vector_interleave_nxv4f32_nxv2f32:
458; ZVBB:       # %bb.0:
459; ZVBB-NEXT:    li a0, 32
460; ZVBB-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
461; ZVBB-NEXT:    vwsll.vx v10, v9, a0
462; ZVBB-NEXT:    vwaddu.wv v10, v10, v8
463; ZVBB-NEXT:    vmv2r.v v8, v10
464; ZVBB-NEXT:    ret
465  %res = call <vscale x 4 x float> @llvm.vector.interleave2.nxv4f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b)
466  ret <vscale x 4 x float> %res
467}
468
469define <vscale x 16 x bfloat> @vector_interleave_nxv16bf16_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) {
470; CHECK-LABEL: vector_interleave_nxv16bf16_nxv8bf16:
471; CHECK:       # %bb.0:
472; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
473; CHECK-NEXT:    vwaddu.vv v12, v8, v10
474; CHECK-NEXT:    li a0, -1
475; CHECK-NEXT:    vwmaccu.vx v12, a0, v10
476; CHECK-NEXT:    vmv4r.v v8, v12
477; CHECK-NEXT:    ret
478;
479; ZVBB-LABEL: vector_interleave_nxv16bf16_nxv8bf16:
480; ZVBB:       # %bb.0:
481; ZVBB-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
482; ZVBB-NEXT:    vwsll.vi v12, v10, 16
483; ZVBB-NEXT:    vwaddu.wv v12, v12, v8
484; ZVBB-NEXT:    vmv4r.v v8, v12
485; ZVBB-NEXT:    ret
486  %res = call <vscale x 16 x bfloat> @llvm.vector.interleave2.nxv16bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b)
487  ret <vscale x 16 x bfloat> %res
488}
489
490define <vscale x 16 x half> @vector_interleave_nxv16f16_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
491; CHECK-LABEL: vector_interleave_nxv16f16_nxv8f16:
492; CHECK:       # %bb.0:
493; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
494; CHECK-NEXT:    vwaddu.vv v12, v8, v10
495; CHECK-NEXT:    li a0, -1
496; CHECK-NEXT:    vwmaccu.vx v12, a0, v10
497; CHECK-NEXT:    vmv4r.v v8, v12
498; CHECK-NEXT:    ret
499;
500; ZVBB-LABEL: vector_interleave_nxv16f16_nxv8f16:
501; ZVBB:       # %bb.0:
502; ZVBB-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
503; ZVBB-NEXT:    vwsll.vi v12, v10, 16
504; ZVBB-NEXT:    vwaddu.wv v12, v12, v8
505; ZVBB-NEXT:    vmv4r.v v8, v12
506; ZVBB-NEXT:    ret
507  %res = call <vscale x 16 x half> @llvm.vector.interleave2.nxv16f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b)
508  ret <vscale x 16 x half> %res
509}
510
511define <vscale x 8 x float> @vector_interleave_nxv8f32_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
512; CHECK-LABEL: vector_interleave_nxv8f32_nxv4f32:
513; CHECK:       # %bb.0:
514; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
515; CHECK-NEXT:    vwaddu.vv v12, v8, v10
516; CHECK-NEXT:    li a0, -1
517; CHECK-NEXT:    vwmaccu.vx v12, a0, v10
518; CHECK-NEXT:    vmv4r.v v8, v12
519; CHECK-NEXT:    ret
520;
521; ZVBB-LABEL: vector_interleave_nxv8f32_nxv4f32:
522; ZVBB:       # %bb.0:
523; ZVBB-NEXT:    li a0, 32
524; ZVBB-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
525; ZVBB-NEXT:    vwsll.vx v12, v10, a0
526; ZVBB-NEXT:    vwaddu.wv v12, v12, v8
527; ZVBB-NEXT:    vmv4r.v v8, v12
528; ZVBB-NEXT:    ret
529  %res = call <vscale x 8 x float> @llvm.vector.interleave2.nxv8f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b)
530  ret <vscale x 8 x float> %res
531}
532
533define <vscale x 4 x double> @vector_interleave_nxv4f64_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
534; CHECK-LABEL: vector_interleave_nxv4f64_nxv2f64:
535; CHECK:       # %bb.0:
536; CHECK-NEXT:    csrr a0, vlenb
537; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, mu
538; CHECK-NEXT:    vid.v v12
539; CHECK-NEXT:    srli a0, a0, 2
540; CHECK-NEXT:    vand.vi v13, v12, 1
541; CHECK-NEXT:    vmsne.vi v0, v13, 0
542; CHECK-NEXT:    vsrl.vi v16, v12, 1
543; CHECK-NEXT:    vadd.vx v16, v16, a0, v0.t
544; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
545; CHECK-NEXT:    vrgatherei16.vv v12, v8, v16
546; CHECK-NEXT:    vmv.v.v v8, v12
547; CHECK-NEXT:    ret
548;
549; ZVBB-LABEL: vector_interleave_nxv4f64_nxv2f64:
550; ZVBB:       # %bb.0:
551; ZVBB-NEXT:    csrr a0, vlenb
552; ZVBB-NEXT:    vsetvli a1, zero, e16, m1, ta, mu
553; ZVBB-NEXT:    vid.v v12
554; ZVBB-NEXT:    srli a0, a0, 2
555; ZVBB-NEXT:    vand.vi v13, v12, 1
556; ZVBB-NEXT:    vmsne.vi v0, v13, 0
557; ZVBB-NEXT:    vsrl.vi v16, v12, 1
558; ZVBB-NEXT:    vadd.vx v16, v16, a0, v0.t
559; ZVBB-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
560; ZVBB-NEXT:    vrgatherei16.vv v12, v8, v16
561; ZVBB-NEXT:    vmv.v.v v8, v12
562; ZVBB-NEXT:    ret
563  %res = call <vscale x 4 x double> @llvm.vector.interleave2.nxv4f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b)
564  ret <vscale x 4 x double> %res
565}
566
567
568declare <vscale x 4 x half> @llvm.vector.interleave2.nxv4f16(<vscale x 2 x half>, <vscale x 2 x half>)
569declare <vscale x 8 x half> @llvm.vector.interleave2.nxv8f16(<vscale x 4 x half>, <vscale x 4 x half>)
570declare <vscale x 4 x float> @llvm.vector.interleave2.nxv4f32(<vscale x 2 x float>, <vscale x 2 x float>)
571declare <vscale x 16 x half> @llvm.vector.interleave2.nxv16f16(<vscale x 8 x half>, <vscale x 8 x half>)
572declare <vscale x 8 x float> @llvm.vector.interleave2.nxv8f32(<vscale x 4 x float>, <vscale x 4 x float>)
573declare <vscale x 4 x double> @llvm.vector.interleave2.nxv4f64(<vscale x 2 x double>, <vscale x 2 x double>)
574
575define <vscale x 64 x bfloat> @vector_interleave_nxv64bf16_nxv32bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b) {
576; CHECK-LABEL: vector_interleave_nxv64bf16_nxv32bf16:
577; CHECK:       # %bb.0:
578; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
579; CHECK-NEXT:    vmv8r.v v24, v8
580; CHECK-NEXT:    vwaddu.vv v8, v24, v16
581; CHECK-NEXT:    li a0, -1
582; CHECK-NEXT:    vwaddu.vv v0, v28, v20
583; CHECK-NEXT:    vwmaccu.vx v8, a0, v16
584; CHECK-NEXT:    vwmaccu.vx v0, a0, v20
585; CHECK-NEXT:    vmv8r.v v16, v0
586; CHECK-NEXT:    ret
587;
588; ZVBB-LABEL: vector_interleave_nxv64bf16_nxv32bf16:
589; ZVBB:       # %bb.0:
590; ZVBB-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
591; ZVBB-NEXT:    vmv8r.v v24, v8
592; ZVBB-NEXT:    vwsll.vi v8, v16, 16
593; ZVBB-NEXT:    vwsll.vi v0, v20, 16
594; ZVBB-NEXT:    vwaddu.wv v8, v8, v24
595; ZVBB-NEXT:    vwaddu.wv v0, v0, v28
596; ZVBB-NEXT:    vmv8r.v v16, v0
597; ZVBB-NEXT:    ret
598  %res = call <vscale x 64 x bfloat> @llvm.vector.interleave2.nxv64bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b)
599  ret <vscale x 64 x bfloat> %res
600}
601
602define <vscale x 64 x half> @vector_interleave_nxv64f16_nxv32f16(<vscale x 32 x half> %a, <vscale x 32 x half> %b) {
603; CHECK-LABEL: vector_interleave_nxv64f16_nxv32f16:
604; CHECK:       # %bb.0:
605; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
606; CHECK-NEXT:    vmv8r.v v24, v8
607; CHECK-NEXT:    vwaddu.vv v8, v24, v16
608; CHECK-NEXT:    li a0, -1
609; CHECK-NEXT:    vwaddu.vv v0, v28, v20
610; CHECK-NEXT:    vwmaccu.vx v8, a0, v16
611; CHECK-NEXT:    vwmaccu.vx v0, a0, v20
612; CHECK-NEXT:    vmv8r.v v16, v0
613; CHECK-NEXT:    ret
614;
615; ZVBB-LABEL: vector_interleave_nxv64f16_nxv32f16:
616; ZVBB:       # %bb.0:
617; ZVBB-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
618; ZVBB-NEXT:    vmv8r.v v24, v8
619; ZVBB-NEXT:    vwsll.vi v8, v16, 16
620; ZVBB-NEXT:    vwsll.vi v0, v20, 16
621; ZVBB-NEXT:    vwaddu.wv v8, v8, v24
622; ZVBB-NEXT:    vwaddu.wv v0, v0, v28
623; ZVBB-NEXT:    vmv8r.v v16, v0
624; ZVBB-NEXT:    ret
625  %res = call <vscale x 64 x half> @llvm.vector.interleave2.nxv64f16(<vscale x 32 x half> %a, <vscale x 32 x half> %b)
626  ret <vscale x 64 x half> %res
627}
628
629define <vscale x 32 x float> @vector_interleave_nxv32f32_nxv16f32(<vscale x 16 x float> %a, <vscale x 16 x float> %b) {
630; CHECK-LABEL: vector_interleave_nxv32f32_nxv16f32:
631; CHECK:       # %bb.0:
632; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
633; CHECK-NEXT:    vmv8r.v v24, v8
634; CHECK-NEXT:    vwaddu.vv v8, v24, v16
635; CHECK-NEXT:    li a0, -1
636; CHECK-NEXT:    vwaddu.vv v0, v28, v20
637; CHECK-NEXT:    vwmaccu.vx v8, a0, v16
638; CHECK-NEXT:    vwmaccu.vx v0, a0, v20
639; CHECK-NEXT:    vmv8r.v v16, v0
640; CHECK-NEXT:    ret
641;
642; ZVBB-LABEL: vector_interleave_nxv32f32_nxv16f32:
643; ZVBB:       # %bb.0:
644; ZVBB-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
645; ZVBB-NEXT:    vmv8r.v v24, v8
646; ZVBB-NEXT:    li a0, 32
647; ZVBB-NEXT:    vwsll.vx v8, v16, a0
648; ZVBB-NEXT:    vwsll.vx v0, v20, a0
649; ZVBB-NEXT:    vwaddu.wv v8, v8, v24
650; ZVBB-NEXT:    vwaddu.wv v0, v0, v28
651; ZVBB-NEXT:    vmv8r.v v16, v0
652; ZVBB-NEXT:    ret
653  %res = call <vscale x 32 x float> @llvm.vector.interleave2.nxv32f32(<vscale x 16 x float> %a, <vscale x 16 x float> %b)
654  ret <vscale x 32 x float> %res
655}
656
657define <vscale x 16 x double> @vector_interleave_nxv16f64_nxv8f64(<vscale x 8 x double> %a, <vscale x 8 x double> %b) {
658; CHECK-LABEL: vector_interleave_nxv16f64_nxv8f64:
659; CHECK:       # %bb.0:
660; CHECK-NEXT:    csrr a0, vlenb
661; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, mu
662; CHECK-NEXT:    vid.v v6
663; CHECK-NEXT:    vmv8r.v v24, v8
664; CHECK-NEXT:    srli a0, a0, 1
665; CHECK-NEXT:    vmv4r.v v28, v16
666; CHECK-NEXT:    vmv4r.v v16, v12
667; CHECK-NEXT:    vand.vi v8, v6, 1
668; CHECK-NEXT:    vmsne.vi v0, v8, 0
669; CHECK-NEXT:    vsrl.vi v6, v6, 1
670; CHECK-NEXT:    vadd.vx v6, v6, a0, v0.t
671; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
672; CHECK-NEXT:    vrgatherei16.vv v8, v24, v6
673; CHECK-NEXT:    vrgatherei16.vv v24, v16, v6
674; CHECK-NEXT:    vmv.v.v v16, v24
675; CHECK-NEXT:    ret
676;
677; ZVBB-LABEL: vector_interleave_nxv16f64_nxv8f64:
678; ZVBB:       # %bb.0:
679; ZVBB-NEXT:    csrr a0, vlenb
680; ZVBB-NEXT:    vsetvli a1, zero, e16, m2, ta, mu
681; ZVBB-NEXT:    vid.v v6
682; ZVBB-NEXT:    vmv8r.v v24, v8
683; ZVBB-NEXT:    srli a0, a0, 1
684; ZVBB-NEXT:    vmv4r.v v28, v16
685; ZVBB-NEXT:    vmv4r.v v16, v12
686; ZVBB-NEXT:    vand.vi v8, v6, 1
687; ZVBB-NEXT:    vmsne.vi v0, v8, 0
688; ZVBB-NEXT:    vsrl.vi v6, v6, 1
689; ZVBB-NEXT:    vadd.vx v6, v6, a0, v0.t
690; ZVBB-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
691; ZVBB-NEXT:    vrgatherei16.vv v8, v24, v6
692; ZVBB-NEXT:    vrgatherei16.vv v24, v16, v6
693; ZVBB-NEXT:    vmv.v.v v16, v24
694; ZVBB-NEXT:    ret
695  %res = call <vscale x 16 x double> @llvm.vector.interleave2.nxv16f64(<vscale x 8 x double> %a, <vscale x 8 x double> %b)
696  ret <vscale x 16 x double> %res
697}
698
699define <vscale x 8 x i32> @vector_interleave_nxv8i32_nxv4i32_poison(<vscale x 4 x i32> %a) {
700; CHECK-LABEL: vector_interleave_nxv8i32_nxv4i32_poison:
701; CHECK:       # %bb.0:
702; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
703; CHECK-NEXT:    vzext.vf2 v12, v8
704; CHECK-NEXT:    vmv.v.v v8, v12
705; CHECK-NEXT:    ret
706;
707; ZVBB-LABEL: vector_interleave_nxv8i32_nxv4i32_poison:
708; ZVBB:       # %bb.0:
709; ZVBB-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
710; ZVBB-NEXT:    vzext.vf2 v12, v8
711; ZVBB-NEXT:    vmv.v.v v8, v12
712; ZVBB-NEXT:    ret
713  %res = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> poison)
714  ret <vscale x 8 x i32> %res
715}
716
717define <vscale x 8 x i32> @vector_interleave_nxv8i32_nxv4i32_poison2(<vscale x 4 x i32> %a) {
718; CHECK-LABEL: vector_interleave_nxv8i32_nxv4i32_poison2:
719; CHECK:       # %bb.0:
720; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
721; CHECK-NEXT:    vzext.vf2 v12, v8
722; CHECK-NEXT:    li a0, 32
723; CHECK-NEXT:    vsll.vx v8, v12, a0
724; CHECK-NEXT:    ret
725;
726; ZVBB-LABEL: vector_interleave_nxv8i32_nxv4i32_poison2:
727; ZVBB:       # %bb.0:
728; ZVBB-NEXT:    li a0, 32
729; ZVBB-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
730; ZVBB-NEXT:    vwsll.vx v12, v8, a0
731; ZVBB-NEXT:    vmv4r.v v8, v12
732; ZVBB-NEXT:    ret
733  %res = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a)
734  ret <vscale x 8 x i32> %res
735}
736
737declare <vscale x 64 x half> @llvm.vector.interleave2.nxv64f16(<vscale x 32 x half>, <vscale x 32 x half>)
738declare <vscale x 32 x float> @llvm.vector.interleave2.nxv32f32(<vscale x 16 x float>, <vscale x 16 x float>)
739declare <vscale x 16 x double> @llvm.vector.interleave2.nxv16f64(<vscale x 8 x double>, <vscale x 8 x double>)
740