xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll (revision 984cb791db347689c4df222e85069ac58929d5ed)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+v,+zvfh,+zvfbfmin | FileCheck %s
3; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+zvfh,+zvfbfmin | FileCheck %s
4; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+v,+zvfhmin,+zvfbfmin | FileCheck %s
5; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+zvfhmin,+zvfbfmin | FileCheck %s
6
7; Integers
8
9define {<vscale x 16 x i1>, <vscale x 16 x i1>} @vector_deinterleave_nxv16i1_nxv32i1(<vscale x 32 x i1> %vec) {
10; CHECK-LABEL: vector_deinterleave_nxv16i1_nxv32i1:
11; CHECK:       # %bb.0:
12; CHECK-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
13; CHECK-NEXT:    vmv.v.i v10, 0
14; CHECK-NEXT:    csrr a0, vlenb
15; CHECK-NEXT:    srli a0, a0, 2
16; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
17; CHECK-NEXT:    vslidedown.vx v8, v0, a0
18; CHECK-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
19; CHECK-NEXT:    vmerge.vim v12, v10, 1, v0
20; CHECK-NEXT:    vmv1r.v v0, v8
21; CHECK-NEXT:    vmerge.vim v14, v10, 1, v0
22; CHECK-NEXT:    vnsrl.wi v8, v12, 0
23; CHECK-NEXT:    vnsrl.wi v10, v12, 8
24; CHECK-NEXT:    vmsne.vi v0, v8, 0
25; CHECK-NEXT:    vmsne.vi v8, v10, 0
26; CHECK-NEXT:    ret
27%retval = call {<vscale x 16 x i1>, <vscale x 16 x i1>} @llvm.vector.deinterleave2.nxv32i1(<vscale x 32 x i1> %vec)
28ret {<vscale x 16 x i1>, <vscale x 16 x i1>} %retval
29}
30
31define {<vscale x 16 x i8>, <vscale x 16 x i8>} @vector_deinterleave_nxv16i8_nxv32i8(<vscale x 32 x i8> %vec) {
32; CHECK-LABEL: vector_deinterleave_nxv16i8_nxv32i8:
33; CHECK:       # %bb.0:
34; CHECK-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
35; CHECK-NEXT:    vnsrl.wi v12, v8, 0
36; CHECK-NEXT:    vnsrl.wi v14, v8, 8
37; CHECK-NEXT:    vmv.v.v v8, v12
38; CHECK-NEXT:    vmv.v.v v10, v14
39; CHECK-NEXT:    ret
40%retval = call {<vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> %vec)
41ret {<vscale x 16 x i8>, <vscale x 16 x i8>} %retval
42}
43
44define {<vscale x 8 x i16>, <vscale x 8 x i16>} @vector_deinterleave_nxv8i16_nxv16i16(<vscale x 16 x i16> %vec) {
45; CHECK-LABEL: vector_deinterleave_nxv8i16_nxv16i16:
46; CHECK:       # %bb.0:
47; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
48; CHECK-NEXT:    vnsrl.wi v12, v8, 0
49; CHECK-NEXT:    vnsrl.wi v14, v8, 16
50; CHECK-NEXT:    vmv.v.v v8, v12
51; CHECK-NEXT:    vmv.v.v v10, v14
52; CHECK-NEXT:    ret
53%retval = call {<vscale x 8 x i16>, <vscale x 8 x i16>} @llvm.vector.deinterleave2.nxv16i16(<vscale x 16 x i16> %vec)
54ret {<vscale x 8 x i16>, <vscale x 8 x i16>} %retval
55}
56
57define {<vscale x 4 x i32>, <vscale x 4 x i32>} @vector_deinterleave_nxv4i32_nxvv8i32(<vscale x 8 x i32> %vec) {
58; CHECK-LABEL: vector_deinterleave_nxv4i32_nxvv8i32:
59; CHECK:       # %bb.0:
60; CHECK-NEXT:    li a0, 32
61; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
62; CHECK-NEXT:    vnsrl.wx v12, v8, a0
63; CHECK-NEXT:    vnsrl.wi v14, v8, 0
64; CHECK-NEXT:    vmv.v.v v8, v14
65; CHECK-NEXT:    vmv.v.v v10, v12
66; CHECK-NEXT:    ret
67%retval = call {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> %vec)
68ret {<vscale x 4 x i32>, <vscale x 4 x i32>} %retval
69}
70
71define {<vscale x 2 x i64>, <vscale x 2 x i64>} @vector_deinterleave_nxv2i64_nxv4i64(<vscale x 4 x i64> %vec) {
72; CHECK-LABEL: vector_deinterleave_nxv2i64_nxv4i64:
73; CHECK:       # %bb.0:
74; CHECK-NEXT:    li a0, 85
75; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
76; CHECK-NEXT:    vmv.v.x v16, a0
77; CHECK-NEXT:    li a0, 170
78; CHECK-NEXT:    vmv.v.x v17, a0
79; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
80; CHECK-NEXT:    vcompress.vm v12, v8, v16
81; CHECK-NEXT:    vcompress.vm v20, v8, v17
82; CHECK-NEXT:    vmv2r.v v8, v12
83; CHECK-NEXT:    vmv2r.v v10, v20
84; CHECK-NEXT:    ret
85%retval = call {<vscale x 2 x i64>, <vscale x 2 x i64>} @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> %vec)
86ret {<vscale x 2 x i64>, <vscale x 2 x i64>} %retval
87}
88
89define {<vscale x 4 x i64>, <vscale x 4 x i64>} @vector_deinterleave_nxv4i64_nxv8i64(<vscale x 8 x i64> %vec) {
90; CHECK-LABEL: vector_deinterleave_nxv4i64_nxv8i64:
91; CHECK:       # %bb.0:
92; CHECK-NEXT:    li a0, 85
93; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, ma
94; CHECK-NEXT:    vmv.v.x v24, a0
95; CHECK-NEXT:    li a0, 170
96; CHECK-NEXT:    vmv.v.x v25, a0
97; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
98; CHECK-NEXT:    vcompress.vm v16, v8, v24
99; CHECK-NEXT:    vcompress.vm v0, v8, v25
100; CHECK-NEXT:    vmv4r.v v8, v16
101; CHECK-NEXT:    vmv4r.v v12, v0
102; CHECK-NEXT:    ret
103%retval = call {<vscale x 4 x i64>, <vscale x 4 x i64>} @llvm.vector.deinterleave2.nxv8i64(<vscale x 8 x i64> %vec)
104ret {<vscale x 4 x i64>, <vscale x 4 x i64>} %retval
105}
106
107declare {<vscale x 16 x i1>, <vscale x 16 x i1>} @llvm.vector.deinterleave2.nxv32i1(<vscale x 32 x i1>)
108declare {<vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8>)
109declare {<vscale x 8 x i16>, <vscale x 8 x i16>} @llvm.vector.deinterleave2.nxv16i16(<vscale x 16 x i16>)
110declare {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32>)
111declare {<vscale x 2 x i64>, <vscale x 2 x i64>} @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64>)
112
113define {<vscale x 64 x i1>, <vscale x 64 x i1>} @vector_deinterleave_nxv64i1_nxv128i1(<vscale x 128 x i1> %vec) {
114; CHECK-LABEL: vector_deinterleave_nxv64i1_nxv128i1:
115; CHECK:       # %bb.0:
116; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
117; CHECK-NEXT:    vmv.v.i v24, 0
118; CHECK-NEXT:    vmerge.vim v16, v24, 1, v0
119; CHECK-NEXT:    vmv1r.v v0, v8
120; CHECK-NEXT:    vmerge.vim v24, v24, 1, v0
121; CHECK-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
122; CHECK-NEXT:    vnsrl.wi v8, v16, 0
123; CHECK-NEXT:    vnsrl.wi v0, v16, 8
124; CHECK-NEXT:    vnsrl.wi v12, v24, 0
125; CHECK-NEXT:    vnsrl.wi v4, v24, 8
126; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
127; CHECK-NEXT:    vmsne.vi v16, v8, 0
128; CHECK-NEXT:    vmsne.vi v8, v0, 0
129; CHECK-NEXT:    vmv1r.v v0, v16
130; CHECK-NEXT:    ret
131%retval = call {<vscale x 64 x i1>, <vscale x 64 x i1>} @llvm.vector.deinterleave2.nxv128i1(<vscale x 128 x i1> %vec)
132ret {<vscale x 64 x i1>, <vscale x 64 x i1>} %retval
133}
134
135define {<vscale x 64 x i8>, <vscale x 64 x i8>} @vector_deinterleave_nxv64i8_nxv128i8(<vscale x 128 x i8> %vec) {
136; CHECK-LABEL: vector_deinterleave_nxv64i8_nxv128i8:
137; CHECK:       # %bb.0:
138; CHECK-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
139; CHECK-NEXT:    vmv8r.v v24, v8
140; CHECK-NEXT:    vnsrl.wi v8, v24, 0
141; CHECK-NEXT:    vnsrl.wi v0, v24, 8
142; CHECK-NEXT:    vnsrl.wi v12, v16, 0
143; CHECK-NEXT:    vnsrl.wi v4, v16, 8
144; CHECK-NEXT:    vmv8r.v v16, v0
145; CHECK-NEXT:    ret
146%retval = call {<vscale x 64 x i8>, <vscale x 64 x i8>} @llvm.vector.deinterleave2.nxv128i8(<vscale x 128 x i8> %vec)
147ret {<vscale x 64 x i8>, <vscale x 64 x i8>} %retval
148}
149
150define {<vscale x 32 x i16>, <vscale x 32 x i16>} @vector_deinterleave_nxv32i16_nxv64i16(<vscale x 64 x i16> %vec) {
151; CHECK-LABEL: vector_deinterleave_nxv32i16_nxv64i16:
152; CHECK:       # %bb.0:
153; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
154; CHECK-NEXT:    vmv8r.v v24, v8
155; CHECK-NEXT:    vnsrl.wi v8, v24, 0
156; CHECK-NEXT:    vnsrl.wi v0, v24, 16
157; CHECK-NEXT:    vnsrl.wi v12, v16, 0
158; CHECK-NEXT:    vnsrl.wi v4, v16, 16
159; CHECK-NEXT:    vmv8r.v v16, v0
160; CHECK-NEXT:    ret
161%retval = call {<vscale x 32 x i16>, <vscale x 32 x i16>} @llvm.vector.deinterleave2.nxv64i16(<vscale x 64 x i16> %vec)
162ret {<vscale x 32 x i16>, <vscale x 32 x i16>} %retval
163}
164
165define {<vscale x 16 x i32>, <vscale x 16 x i32>} @vector_deinterleave_nxv16i32_nxvv32i32(<vscale x 32 x i32> %vec) {
166; CHECK-LABEL: vector_deinterleave_nxv16i32_nxvv32i32:
167; CHECK:       # %bb.0:
168; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
169; CHECK-NEXT:    vmv8r.v v24, v16
170; CHECK-NEXT:    li a0, 32
171; CHECK-NEXT:    vnsrl.wx v20, v24, a0
172; CHECK-NEXT:    vnsrl.wx v16, v8, a0
173; CHECK-NEXT:    vnsrl.wi v0, v8, 0
174; CHECK-NEXT:    vnsrl.wi v4, v24, 0
175; CHECK-NEXT:    vmv8r.v v8, v0
176; CHECK-NEXT:    ret
177%retval = call {<vscale x 16 x i32>, <vscale x 16 x i32>} @llvm.vector.deinterleave2.nxv32i32(<vscale x 32 x i32> %vec)
178ret {<vscale x 16 x i32>, <vscale x 16 x i32>} %retval
179}
180
181define {<vscale x 8 x i64>, <vscale x 8 x i64>} @vector_deinterleave_nxv8i64_nxv16i64(<vscale x 16 x i64> %vec) {
182; CHECK-LABEL: vector_deinterleave_nxv8i64_nxv16i64:
183; CHECK:       # %bb.0:
184; CHECK-NEXT:    addi sp, sp, -16
185; CHECK-NEXT:    .cfi_def_cfa_offset 16
186; CHECK-NEXT:    csrr a0, vlenb
187; CHECK-NEXT:    slli a0, a0, 4
188; CHECK-NEXT:    sub sp, sp, a0
189; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
190; CHECK-NEXT:    li a0, 85
191; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, ma
192; CHECK-NEXT:    vmv.v.x v7, a0
193; CHECK-NEXT:    li a0, 170
194; CHECK-NEXT:    vmv.v.x v6, a0
195; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
196; CHECK-NEXT:    vcompress.vm v24, v8, v7
197; CHECK-NEXT:    vmv1r.v v28, v7
198; CHECK-NEXT:    vmv1r.v v29, v6
199; CHECK-NEXT:    vcompress.vm v0, v8, v29
200; CHECK-NEXT:    vcompress.vm v8, v16, v28
201; CHECK-NEXT:    addi a0, sp, 16
202; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
203; CHECK-NEXT:    vcompress.vm v8, v16, v29
204; CHECK-NEXT:    csrr a0, vlenb
205; CHECK-NEXT:    slli a0, a0, 3
206; CHECK-NEXT:    add a0, sp, a0
207; CHECK-NEXT:    addi a0, a0, 16
208; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
209; CHECK-NEXT:    addi a0, sp, 16
210; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
211; CHECK-NEXT:    vmv4r.v v28, v8
212; CHECK-NEXT:    csrr a0, vlenb
213; CHECK-NEXT:    slli a0, a0, 3
214; CHECK-NEXT:    add a0, sp, a0
215; CHECK-NEXT:    addi a0, a0, 16
216; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
217; CHECK-NEXT:    vmv4r.v v4, v8
218; CHECK-NEXT:    vmv8r.v v8, v24
219; CHECK-NEXT:    vmv8r.v v16, v0
220; CHECK-NEXT:    csrr a0, vlenb
221; CHECK-NEXT:    slli a0, a0, 4
222; CHECK-NEXT:    add sp, sp, a0
223; CHECK-NEXT:    .cfi_def_cfa sp, 16
224; CHECK-NEXT:    addi sp, sp, 16
225; CHECK-NEXT:    .cfi_def_cfa_offset 0
226; CHECK-NEXT:    ret
227%retval = call {<vscale x 8 x i64>, <vscale x 8 x i64>} @llvm.vector.deinterleave2.nxv16i64(<vscale x 16 x i64> %vec)
228ret {<vscale x 8 x i64>, <vscale x 8 x i64>} %retval
229}
230
231declare {<vscale x 64 x i1>, <vscale x 64 x i1>} @llvm.vector.deinterleave2.nxv128i1(<vscale x 128 x i1>)
232declare {<vscale x 64 x i8>, <vscale x 64 x i8>} @llvm.vector.deinterleave2.nxv128i8(<vscale x 128 x i8>)
233declare {<vscale x 32 x i16>, <vscale x 32 x i16>} @llvm.vector.deinterleave2.nxv64i16(<vscale x 64 x i16>)
234declare {<vscale x 16 x i32>, <vscale x 16 x i32>} @llvm.vector.deinterleave2.nxv32i32(<vscale x 32 x i32>)
235declare {<vscale x 8 x i64>, <vscale x 8 x i64>} @llvm.vector.deinterleave2.nxv16i64(<vscale x 16 x i64>)
236
237; Floats
238
239define {<vscale x 2 x bfloat>, <vscale x 2 x bfloat>} @vector_deinterleave_nxv2bf16_nxv4bf16(<vscale x 4 x bfloat> %vec) {
240; CHECK-LABEL: vector_deinterleave_nxv2bf16_nxv4bf16:
241; CHECK:       # %bb.0:
242; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
243; CHECK-NEXT:    vnsrl.wi v10, v8, 0
244; CHECK-NEXT:    vnsrl.wi v9, v8, 16
245; CHECK-NEXT:    vmv1r.v v8, v10
246; CHECK-NEXT:    ret
247%retval = call {<vscale x 2 x bfloat>, <vscale x 2 x bfloat>} @llvm.vector.deinterleave2.nxv4bf16(<vscale x 4 x bfloat> %vec)
248ret {<vscale x 2 x bfloat>, <vscale x 2 x bfloat>} %retval
249}
250
251define {<vscale x 2 x half>, <vscale x 2 x half>} @vector_deinterleave_nxv2f16_nxv4f16(<vscale x 4 x half> %vec) {
252; CHECK-LABEL: vector_deinterleave_nxv2f16_nxv4f16:
253; CHECK:       # %bb.0:
254; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
255; CHECK-NEXT:    vnsrl.wi v10, v8, 0
256; CHECK-NEXT:    vnsrl.wi v9, v8, 16
257; CHECK-NEXT:    vmv1r.v v8, v10
258; CHECK-NEXT:    ret
259%retval = call {<vscale x 2 x half>, <vscale x 2 x half>} @llvm.vector.deinterleave2.nxv4f16(<vscale x 4 x half> %vec)
260ret {<vscale x 2 x half>, <vscale x 2 x half>} %retval
261}
262
263define {<vscale x 4 x bfloat>, <vscale x 4 x bfloat>} @vector_deinterleave_nxv4bf16_nxv8bf16(<vscale x 8 x bfloat> %vec) {
264; CHECK-LABEL: vector_deinterleave_nxv4bf16_nxv8bf16:
265; CHECK:       # %bb.0:
266; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
267; CHECK-NEXT:    vnsrl.wi v10, v8, 0
268; CHECK-NEXT:    vnsrl.wi v11, v8, 16
269; CHECK-NEXT:    vmv.v.v v8, v10
270; CHECK-NEXT:    vmv.v.v v9, v11
271; CHECK-NEXT:    ret
272%retval = call {<vscale x 4 x bfloat>, <vscale x 4 x bfloat>} @llvm.vector.deinterleave2.nxv8bf16(<vscale x 8 x bfloat> %vec)
273ret {<vscale x 4 x bfloat>, <vscale x 4 x bfloat>} %retval
274}
275
276define {<vscale x 4 x half>, <vscale x 4 x half>} @vector_deinterleave_nxv4f16_nxv8f16(<vscale x 8 x half> %vec) {
277; CHECK-LABEL: vector_deinterleave_nxv4f16_nxv8f16:
278; CHECK:       # %bb.0:
279; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
280; CHECK-NEXT:    vnsrl.wi v10, v8, 0
281; CHECK-NEXT:    vnsrl.wi v11, v8, 16
282; CHECK-NEXT:    vmv.v.v v8, v10
283; CHECK-NEXT:    vmv.v.v v9, v11
284; CHECK-NEXT:    ret
285%retval = call {<vscale x 4 x half>, <vscale x 4 x half>} @llvm.vector.deinterleave2.nxv8f16(<vscale x 8 x half> %vec)
286ret {<vscale x 4 x half>, <vscale x 4 x half>} %retval
287}
288
289define {<vscale x 2 x float>, <vscale x 2 x float>} @vector_deinterleave_nxv2f32_nxv4f32(<vscale x 4 x float> %vec) {
290; CHECK-LABEL: vector_deinterleave_nxv2f32_nxv4f32:
291; CHECK:       # %bb.0:
292; CHECK-NEXT:    li a0, 32
293; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
294; CHECK-NEXT:    vnsrl.wx v10, v8, a0
295; CHECK-NEXT:    vnsrl.wi v11, v8, 0
296; CHECK-NEXT:    vmv.v.v v8, v11
297; CHECK-NEXT:    vmv.v.v v9, v10
298; CHECK-NEXT:    ret
299%retval = call {<vscale x 2 x float>, <vscale x 2 x float>} @llvm.vector.deinterleave2.nxv4f32(<vscale x 4 x float> %vec)
300ret {<vscale x 2 x float>, <vscale x 2 x float>} %retval
301}
302
303define {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} @vector_deinterleave_nxv8bf16_nxv16bf16(<vscale x 16 x bfloat> %vec) {
304; CHECK-LABEL: vector_deinterleave_nxv8bf16_nxv16bf16:
305; CHECK:       # %bb.0:
306; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
307; CHECK-NEXT:    vnsrl.wi v12, v8, 0
308; CHECK-NEXT:    vnsrl.wi v14, v8, 16
309; CHECK-NEXT:    vmv.v.v v8, v12
310; CHECK-NEXT:    vmv.v.v v10, v14
311; CHECK-NEXT:    ret
312%retval = call {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} @llvm.vector.deinterleave2.nxv16bf16(<vscale x 16 x bfloat> %vec)
313ret {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} %retval
314}
315
316define {<vscale x 8 x half>, <vscale x 8 x half>} @vector_deinterleave_nxv8f16_nxv16f16(<vscale x 16 x half> %vec) {
317; CHECK-LABEL: vector_deinterleave_nxv8f16_nxv16f16:
318; CHECK:       # %bb.0:
319; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
320; CHECK-NEXT:    vnsrl.wi v12, v8, 0
321; CHECK-NEXT:    vnsrl.wi v14, v8, 16
322; CHECK-NEXT:    vmv.v.v v8, v12
323; CHECK-NEXT:    vmv.v.v v10, v14
324; CHECK-NEXT:    ret
325%retval = call {<vscale x 8 x half>, <vscale x 8 x half>} @llvm.vector.deinterleave2.nxv16f16(<vscale x 16 x half> %vec)
326ret {<vscale x 8 x half>, <vscale x 8 x half>} %retval
327}
328
329define {<vscale x 4 x float>, <vscale x 4 x float>} @vector_deinterleave_nxv4f32_nxv8f32(<vscale x 8 x float> %vec) {
330; CHECK-LABEL: vector_deinterleave_nxv4f32_nxv8f32:
331; CHECK:       # %bb.0:
332; CHECK-NEXT:    li a0, 32
333; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
334; CHECK-NEXT:    vnsrl.wx v12, v8, a0
335; CHECK-NEXT:    vnsrl.wi v14, v8, 0
336; CHECK-NEXT:    vmv.v.v v8, v14
337; CHECK-NEXT:    vmv.v.v v10, v12
338; CHECK-NEXT:    ret
339%retval = call {<vscale x 4 x float>, <vscale x 4 x float>} @llvm.vector.deinterleave2.nxv8f32(<vscale x 8 x float> %vec)
340ret  {<vscale x 4 x float>, <vscale x 4 x float>} %retval
341}
342
343define {<vscale x 2 x double>, <vscale x 2 x double>} @vector_deinterleave_nxv2f64_nxv4f64(<vscale x 4 x double> %vec) {
344; CHECK-LABEL: vector_deinterleave_nxv2f64_nxv4f64:
345; CHECK:       # %bb.0:
346; CHECK-NEXT:    li a0, 85
347; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
348; CHECK-NEXT:    vmv.v.x v16, a0
349; CHECK-NEXT:    li a0, 170
350; CHECK-NEXT:    vmv.v.x v17, a0
351; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
352; CHECK-NEXT:    vcompress.vm v12, v8, v16
353; CHECK-NEXT:    vcompress.vm v20, v8, v17
354; CHECK-NEXT:    vmv2r.v v8, v12
355; CHECK-NEXT:    vmv2r.v v10, v20
356; CHECK-NEXT:    ret
357%retval = call {<vscale x 2 x double>, <vscale x 2 x double>} @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %vec)
358ret {<vscale x 2 x double>, <vscale x 2 x double>} %retval
359}
360
361declare {<vscale x 2 x half>,<vscale x 2 x half>} @llvm.vector.deinterleave2.nxv4f16(<vscale x 4 x half>)
362declare {<vscale x 4 x half>, <vscale x 4 x half>} @llvm.vector.deinterleave2.nxv8f16(<vscale x 8 x half>)
363declare {<vscale x 2 x float>, <vscale x 2 x float>} @llvm.vector.deinterleave2.nxv4f32(<vscale x 4 x float>)
364declare {<vscale x 8 x half>, <vscale x 8 x half>} @llvm.vector.deinterleave2.nxv16f16(<vscale x 16 x half>)
365declare {<vscale x 4 x float>, <vscale x 4 x float>} @llvm.vector.deinterleave2.nxv8f32(<vscale x 8 x float>)
366declare {<vscale x 2 x double>, <vscale x 2 x double>} @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double>)
367
368define {<vscale x 32 x bfloat>, <vscale x 32 x bfloat>} @vector_deinterleave_nxv32bf16_nxv64bf16(<vscale x 64 x bfloat> %vec) {
369; CHECK-LABEL: vector_deinterleave_nxv32bf16_nxv64bf16:
370; CHECK:       # %bb.0:
371; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
372; CHECK-NEXT:    vmv8r.v v24, v8
373; CHECK-NEXT:    vnsrl.wi v8, v24, 0
374; CHECK-NEXT:    vnsrl.wi v0, v24, 16
375; CHECK-NEXT:    vnsrl.wi v12, v16, 0
376; CHECK-NEXT:    vnsrl.wi v4, v16, 16
377; CHECK-NEXT:    vmv8r.v v16, v0
378; CHECK-NEXT:    ret
379%retval = call {<vscale x 32 x bfloat>, <vscale x 32 x bfloat>} @llvm.vector.deinterleave2.nxv64bf16(<vscale x 64 x bfloat> %vec)
380ret {<vscale x 32 x bfloat>, <vscale x 32 x bfloat>} %retval
381}
382
383define {<vscale x 32 x half>, <vscale x 32 x half>} @vector_deinterleave_nxv32f16_nxv64f16(<vscale x 64 x half> %vec) {
384; CHECK-LABEL: vector_deinterleave_nxv32f16_nxv64f16:
385; CHECK:       # %bb.0:
386; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
387; CHECK-NEXT:    vmv8r.v v24, v8
388; CHECK-NEXT:    vnsrl.wi v8, v24, 0
389; CHECK-NEXT:    vnsrl.wi v0, v24, 16
390; CHECK-NEXT:    vnsrl.wi v12, v16, 0
391; CHECK-NEXT:    vnsrl.wi v4, v16, 16
392; CHECK-NEXT:    vmv8r.v v16, v0
393; CHECK-NEXT:    ret
394%retval = call {<vscale x 32 x half>, <vscale x 32 x half>} @llvm.vector.deinterleave2.nxv64f16(<vscale x 64 x half> %vec)
395ret {<vscale x 32 x half>, <vscale x 32 x half>} %retval
396}
397
398define {<vscale x 16 x float>, <vscale x 16 x float>} @vector_deinterleave_nxv16f32_nxv32f32(<vscale x 32 x float> %vec) {
399; CHECK-LABEL: vector_deinterleave_nxv16f32_nxv32f32:
400; CHECK:       # %bb.0:
401; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
402; CHECK-NEXT:    vmv8r.v v24, v16
403; CHECK-NEXT:    li a0, 32
404; CHECK-NEXT:    vnsrl.wx v20, v24, a0
405; CHECK-NEXT:    vnsrl.wx v16, v8, a0
406; CHECK-NEXT:    vnsrl.wi v0, v8, 0
407; CHECK-NEXT:    vnsrl.wi v4, v24, 0
408; CHECK-NEXT:    vmv8r.v v8, v0
409; CHECK-NEXT:    ret
410%retval = call {<vscale x 16 x float>, <vscale x 16 x float>} @llvm.vector.deinterleave2.nxv32f32(<vscale x 32 x float> %vec)
411ret  {<vscale x 16 x float>, <vscale x 16 x float>} %retval
412}
413
414define {<vscale x 8 x double>, <vscale x 8 x double>} @vector_deinterleave_nxv8f64_nxv16f64(<vscale x 16 x double> %vec) {
415; CHECK-LABEL: vector_deinterleave_nxv8f64_nxv16f64:
416; CHECK:       # %bb.0:
417; CHECK-NEXT:    addi sp, sp, -16
418; CHECK-NEXT:    .cfi_def_cfa_offset 16
419; CHECK-NEXT:    csrr a0, vlenb
420; CHECK-NEXT:    slli a0, a0, 4
421; CHECK-NEXT:    sub sp, sp, a0
422; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
423; CHECK-NEXT:    li a0, 85
424; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, ma
425; CHECK-NEXT:    vmv.v.x v7, a0
426; CHECK-NEXT:    li a0, 170
427; CHECK-NEXT:    vmv.v.x v6, a0
428; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
429; CHECK-NEXT:    vcompress.vm v24, v8, v7
430; CHECK-NEXT:    vmv1r.v v28, v7
431; CHECK-NEXT:    vmv1r.v v29, v6
432; CHECK-NEXT:    vcompress.vm v0, v8, v29
433; CHECK-NEXT:    vcompress.vm v8, v16, v28
434; CHECK-NEXT:    addi a0, sp, 16
435; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
436; CHECK-NEXT:    vcompress.vm v8, v16, v29
437; CHECK-NEXT:    csrr a0, vlenb
438; CHECK-NEXT:    slli a0, a0, 3
439; CHECK-NEXT:    add a0, sp, a0
440; CHECK-NEXT:    addi a0, a0, 16
441; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
442; CHECK-NEXT:    addi a0, sp, 16
443; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
444; CHECK-NEXT:    vmv4r.v v28, v8
445; CHECK-NEXT:    csrr a0, vlenb
446; CHECK-NEXT:    slli a0, a0, 3
447; CHECK-NEXT:    add a0, sp, a0
448; CHECK-NEXT:    addi a0, a0, 16
449; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
450; CHECK-NEXT:    vmv4r.v v4, v8
451; CHECK-NEXT:    vmv8r.v v8, v24
452; CHECK-NEXT:    vmv8r.v v16, v0
453; CHECK-NEXT:    csrr a0, vlenb
454; CHECK-NEXT:    slli a0, a0, 4
455; CHECK-NEXT:    add sp, sp, a0
456; CHECK-NEXT:    .cfi_def_cfa sp, 16
457; CHECK-NEXT:    addi sp, sp, 16
458; CHECK-NEXT:    .cfi_def_cfa_offset 0
459; CHECK-NEXT:    ret
460%retval = call {<vscale x 8 x double>, <vscale x 8 x double>} @llvm.vector.deinterleave2.nxv16f64(<vscale x 16 x double> %vec)
461ret {<vscale x 8 x double>, <vscale x 8 x double>} %retval
462}
463
464declare {<vscale x 32 x half>, <vscale x 32 x half>} @llvm.vector.deinterleave2.nxv64f16(<vscale x 64 x half>)
465declare {<vscale x 16 x float>, <vscale x 16 x float>} @llvm.vector.deinterleave2.nxv32f32(<vscale x 32 x float>)
466declare {<vscale x 8 x double>, <vscale x 8 x double>} @llvm.vector.deinterleave2.nxv16f64(<vscale x 16 x double>)
467