xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll (revision b6c0f1bfa79a3a32d841ac5ab1f94c3aee3b5d90)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfh | FileCheck -check-prefixes=CHECK,RV32 %s
3; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfh | FileCheck -check-prefixes=CHECK,RV64 %s
4; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvbb,+zvfh | FileCheck %s --check-prefix=ZVBB
5; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvbb,+zvfh | FileCheck %s --check-prefix=ZVBB
6
7; Integers
8
9define <32 x i1> @vector_interleave_v32i1_v16i1(<16 x i1> %a, <16 x i1> %b) {
10; CHECK-LABEL: vector_interleave_v32i1_v16i1:
11; CHECK:       # %bb.0:
12; CHECK-NEXT:    li a0, 32
13; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
14; CHECK-NEXT:    vslideup.vi v0, v8, 2
15; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
16; CHECK-NEXT:    vmv.v.i v8, 0
17; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
18; CHECK-NEXT:    vsetivli zero, 16, e8, m2, ta, ma
19; CHECK-NEXT:    vslidedown.vi v10, v8, 16
20; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
21; CHECK-NEXT:    vwaddu.vv v12, v8, v10
22; CHECK-NEXT:    li a1, -1
23; CHECK-NEXT:    vwmaccu.vx v12, a1, v10
24; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
25; CHECK-NEXT:    vmsne.vi v0, v12, 0
26; CHECK-NEXT:    ret
27;
28; ZVBB-LABEL: vector_interleave_v32i1_v16i1:
29; ZVBB:       # %bb.0:
30; ZVBB-NEXT:    li a0, 32
31; ZVBB-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
32; ZVBB-NEXT:    vslideup.vi v0, v8, 2
33; ZVBB-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
34; ZVBB-NEXT:    vmv.v.i v8, 0
35; ZVBB-NEXT:    vmerge.vim v8, v8, 1, v0
36; ZVBB-NEXT:    vsetivli zero, 16, e8, m2, ta, ma
37; ZVBB-NEXT:    vslidedown.vi v10, v8, 16
38; ZVBB-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
39; ZVBB-NEXT:    vwsll.vi v12, v10, 8
40; ZVBB-NEXT:    vwaddu.wv v12, v12, v8
41; ZVBB-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
42; ZVBB-NEXT:    vmsne.vi v0, v12, 0
43; ZVBB-NEXT:    ret
44	   %res = call <32 x i1> @llvm.vector.interleave2.v32i1(<16 x i1> %a, <16 x i1> %b)
45	   ret <32 x i1> %res
46}
47
48define <16 x i16> @vector_interleave_v16i16_v8i16(<8 x i16> %a, <8 x i16> %b) {
49; CHECK-LABEL: vector_interleave_v16i16_v8i16:
50; CHECK:       # %bb.0:
51; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
52; CHECK-NEXT:    vwaddu.vv v10, v8, v9
53; CHECK-NEXT:    li a0, -1
54; CHECK-NEXT:    vwmaccu.vx v10, a0, v9
55; CHECK-NEXT:    vmv2r.v v8, v10
56; CHECK-NEXT:    ret
57;
58; ZVBB-LABEL: vector_interleave_v16i16_v8i16:
59; ZVBB:       # %bb.0:
60; ZVBB-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
61; ZVBB-NEXT:    vwsll.vi v10, v9, 16
62; ZVBB-NEXT:    vwaddu.wv v10, v10, v8
63; ZVBB-NEXT:    vmv2r.v v8, v10
64; ZVBB-NEXT:    ret
65	   %res = call <16 x i16> @llvm.vector.interleave2.v16i16(<8 x i16> %a, <8 x i16> %b)
66	   ret <16 x i16> %res
67}
68
69define <8 x i32> @vector_interleave_v8i32_v4i32(<4 x i32> %a, <4 x i32> %b) {
70; CHECK-LABEL: vector_interleave_v8i32_v4i32:
71; CHECK:       # %bb.0:
72; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
73; CHECK-NEXT:    vwaddu.vv v10, v8, v9
74; CHECK-NEXT:    li a0, -1
75; CHECK-NEXT:    vwmaccu.vx v10, a0, v9
76; CHECK-NEXT:    vmv2r.v v8, v10
77; CHECK-NEXT:    ret
78;
79; ZVBB-LABEL: vector_interleave_v8i32_v4i32:
80; ZVBB:       # %bb.0:
81; ZVBB-NEXT:    li a0, 32
82; ZVBB-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
83; ZVBB-NEXT:    vwsll.vx v10, v9, a0
84; ZVBB-NEXT:    vwaddu.wv v10, v10, v8
85; ZVBB-NEXT:    vmv2r.v v8, v10
86; ZVBB-NEXT:    ret
87	   %res = call <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32> %a, <4 x i32> %b)
88	   ret <8 x i32> %res
89}
90
91define <4 x i64> @vector_interleave_v4i64_v2i64(<2 x i64> %a, <2 x i64> %b) {
92; CHECK-LABEL: vector_interleave_v4i64_v2i64:
93; CHECK:       # %bb.0:
94; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
95; CHECK-NEXT:    vmv1r.v v10, v9
96; CHECK-NEXT:    lui a0, 12304
97; CHECK-NEXT:    addi a0, a0, 512
98; CHECK-NEXT:    vslideup.vi v8, v10, 2
99; CHECK-NEXT:    vmv.s.x v10, a0
100; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
101; CHECK-NEXT:    vsext.vf2 v12, v10
102; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
103; CHECK-NEXT:    vrgatherei16.vv v10, v8, v12
104; CHECK-NEXT:    vmv.v.v v8, v10
105; CHECK-NEXT:    ret
106;
107; ZVBB-LABEL: vector_interleave_v4i64_v2i64:
108; ZVBB:       # %bb.0:
109; ZVBB-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
110; ZVBB-NEXT:    vmv1r.v v10, v9
111; ZVBB-NEXT:    lui a0, 12304
112; ZVBB-NEXT:    addi a0, a0, 512
113; ZVBB-NEXT:    vslideup.vi v8, v10, 2
114; ZVBB-NEXT:    vmv.s.x v10, a0
115; ZVBB-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
116; ZVBB-NEXT:    vsext.vf2 v12, v10
117; ZVBB-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
118; ZVBB-NEXT:    vrgatherei16.vv v10, v8, v12
119; ZVBB-NEXT:    vmv.v.v v8, v10
120; ZVBB-NEXT:    ret
121	   %res = call <4 x i64> @llvm.vector.interleave2.v4i64(<2 x i64> %a, <2 x i64> %b)
122	   ret <4 x i64> %res
123}
124
125declare <32 x i1> @llvm.vector.interleave2.v32i1(<16 x i1>, <16 x i1>)
126declare <16 x i16> @llvm.vector.interleave2.v16i16(<8 x i16>, <8 x i16>)
127declare <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32>, <4 x i32>)
128declare <4 x i64> @llvm.vector.interleave2.v4i64(<2 x i64>, <2 x i64>)
129
130; Floats
131
132define <4 x half> @vector_interleave_v4f16_v2f16(<2 x half> %a, <2 x half> %b) {
133; CHECK-LABEL: vector_interleave_v4f16_v2f16:
134; CHECK:       # %bb.0:
135; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
136; CHECK-NEXT:    vwaddu.vv v10, v8, v9
137; CHECK-NEXT:    li a0, -1
138; CHECK-NEXT:    vwmaccu.vx v10, a0, v9
139; CHECK-NEXT:    vmv1r.v v8, v10
140; CHECK-NEXT:    ret
141;
142; ZVBB-LABEL: vector_interleave_v4f16_v2f16:
143; ZVBB:       # %bb.0:
144; ZVBB-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
145; ZVBB-NEXT:    vwsll.vi v10, v9, 16
146; ZVBB-NEXT:    vwaddu.wv v10, v10, v8
147; ZVBB-NEXT:    vmv1r.v v8, v10
148; ZVBB-NEXT:    ret
149	   %res = call <4 x half> @llvm.vector.interleave2.v4f16(<2 x half> %a, <2 x half> %b)
150	   ret <4 x half> %res
151}
152
153define <8 x half> @vector_interleave_v8f16_v4f16(<4 x half> %a, <4 x half> %b) {
154; CHECK-LABEL: vector_interleave_v8f16_v4f16:
155; CHECK:       # %bb.0:
156; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
157; CHECK-NEXT:    vwaddu.vv v10, v8, v9
158; CHECK-NEXT:    li a0, -1
159; CHECK-NEXT:    vwmaccu.vx v10, a0, v9
160; CHECK-NEXT:    vmv1r.v v8, v10
161; CHECK-NEXT:    ret
162;
163; ZVBB-LABEL: vector_interleave_v8f16_v4f16:
164; ZVBB:       # %bb.0:
165; ZVBB-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
166; ZVBB-NEXT:    vwsll.vi v10, v9, 16
167; ZVBB-NEXT:    vwaddu.wv v10, v10, v8
168; ZVBB-NEXT:    vmv1r.v v8, v10
169; ZVBB-NEXT:    ret
170	   %res = call <8 x half> @llvm.vector.interleave2.v8f16(<4 x half> %a, <4 x half> %b)
171	   ret <8 x half> %res
172}
173
174define <4 x float> @vector_interleave_v4f32_v2f32(<2 x float> %a, <2 x float> %b) {
175; CHECK-LABEL: vector_interleave_v4f32_v2f32:
176; CHECK:       # %bb.0:
177; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
178; CHECK-NEXT:    vwaddu.vv v10, v8, v9
179; CHECK-NEXT:    li a0, -1
180; CHECK-NEXT:    vwmaccu.vx v10, a0, v9
181; CHECK-NEXT:    vmv1r.v v8, v10
182; CHECK-NEXT:    ret
183;
184; ZVBB-LABEL: vector_interleave_v4f32_v2f32:
185; ZVBB:       # %bb.0:
186; ZVBB-NEXT:    li a0, 32
187; ZVBB-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
188; ZVBB-NEXT:    vwsll.vx v10, v9, a0
189; ZVBB-NEXT:    vwaddu.wv v10, v10, v8
190; ZVBB-NEXT:    vmv1r.v v8, v10
191; ZVBB-NEXT:    ret
192	   %res = call <4 x float> @llvm.vector.interleave2.v4f32(<2 x float> %a, <2 x float> %b)
193	   ret <4 x float> %res
194}
195
196define <16 x half> @vector_interleave_v16f16_v8f16(<8 x half> %a, <8 x half> %b) {
197; CHECK-LABEL: vector_interleave_v16f16_v8f16:
198; CHECK:       # %bb.0:
199; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
200; CHECK-NEXT:    vwaddu.vv v10, v8, v9
201; CHECK-NEXT:    li a0, -1
202; CHECK-NEXT:    vwmaccu.vx v10, a0, v9
203; CHECK-NEXT:    vmv2r.v v8, v10
204; CHECK-NEXT:    ret
205;
206; ZVBB-LABEL: vector_interleave_v16f16_v8f16:
207; ZVBB:       # %bb.0:
208; ZVBB-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
209; ZVBB-NEXT:    vwsll.vi v10, v9, 16
210; ZVBB-NEXT:    vwaddu.wv v10, v10, v8
211; ZVBB-NEXT:    vmv2r.v v8, v10
212; ZVBB-NEXT:    ret
213	   %res = call <16 x half> @llvm.vector.interleave2.v16f16(<8 x half> %a, <8 x half> %b)
214	   ret <16 x half> %res
215}
216
217define <8 x float> @vector_interleave_v8f32_v4f32(<4 x float> %a, <4 x float> %b) {
218; CHECK-LABEL: vector_interleave_v8f32_v4f32:
219; CHECK:       # %bb.0:
220; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
221; CHECK-NEXT:    vwaddu.vv v10, v8, v9
222; CHECK-NEXT:    li a0, -1
223; CHECK-NEXT:    vwmaccu.vx v10, a0, v9
224; CHECK-NEXT:    vmv2r.v v8, v10
225; CHECK-NEXT:    ret
226;
227; ZVBB-LABEL: vector_interleave_v8f32_v4f32:
228; ZVBB:       # %bb.0:
229; ZVBB-NEXT:    li a0, 32
230; ZVBB-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
231; ZVBB-NEXT:    vwsll.vx v10, v9, a0
232; ZVBB-NEXT:    vwaddu.wv v10, v10, v8
233; ZVBB-NEXT:    vmv2r.v v8, v10
234; ZVBB-NEXT:    ret
235	   %res = call <8 x float> @llvm.vector.interleave2.v8f32(<4 x float> %a, <4 x float> %b)
236	   ret <8 x float> %res
237}
238
239define <4 x double> @vector_interleave_v4f64_v2f64(<2 x double> %a, <2 x double> %b) {
240; CHECK-LABEL: vector_interleave_v4f64_v2f64:
241; CHECK:       # %bb.0:
242; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
243; CHECK-NEXT:    vmv1r.v v10, v9
244; CHECK-NEXT:    lui a0, 12304
245; CHECK-NEXT:    addi a0, a0, 512
246; CHECK-NEXT:    vslideup.vi v8, v10, 2
247; CHECK-NEXT:    vmv.s.x v10, a0
248; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
249; CHECK-NEXT:    vsext.vf2 v12, v10
250; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
251; CHECK-NEXT:    vrgatherei16.vv v10, v8, v12
252; CHECK-NEXT:    vmv.v.v v8, v10
253; CHECK-NEXT:    ret
254;
255; ZVBB-LABEL: vector_interleave_v4f64_v2f64:
256; ZVBB:       # %bb.0:
257; ZVBB-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
258; ZVBB-NEXT:    vmv1r.v v10, v9
259; ZVBB-NEXT:    lui a0, 12304
260; ZVBB-NEXT:    addi a0, a0, 512
261; ZVBB-NEXT:    vslideup.vi v8, v10, 2
262; ZVBB-NEXT:    vmv.s.x v10, a0
263; ZVBB-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
264; ZVBB-NEXT:    vsext.vf2 v12, v10
265; ZVBB-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
266; ZVBB-NEXT:    vrgatherei16.vv v10, v8, v12
267; ZVBB-NEXT:    vmv.v.v v8, v10
268; ZVBB-NEXT:    ret
269	   %res = call <4 x double> @llvm.vector.interleave2.v4f64(<2 x double> %a, <2 x double> %b)
270	   ret <4 x double> %res
271}
272
273
274declare <4 x half> @llvm.vector.interleave2.v4f16(<2 x half>, <2 x half>)
275declare <8 x half> @llvm.vector.interleave2.v8f16(<4 x half>, <4 x half>)
276declare <4 x float> @llvm.vector.interleave2.v4f32(<2 x float>, <2 x float>)
277declare <16 x half> @llvm.vector.interleave2.v16f16(<8 x half>, <8 x half>)
278declare <8 x float> @llvm.vector.interleave2.v8f32(<4 x float>, <4 x float>)
279declare <4 x double> @llvm.vector.interleave2.v4f64(<2 x double>, <2 x double>)
280;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
281; RV32: {{.*}}
282; RV64: {{.*}}
283