xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-concat.ll (revision b6c0f1bfa79a3a32d841ac5ab1f94c3aee3b5d90)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2; RUN: llc < %s -mtriple=riscv32 -mattr=+v -verify-machineinstrs | FileCheck -check-prefix=VLA %s
3; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck -check-prefix=VLA %s
4
5; RUN: llc < %s -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-max=128 -verify-machineinstrs | FileCheck -check-prefix=VLS %s
6; RUN: llc < %s -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -verify-machineinstrs | FileCheck -check-prefix=VLS %s
7
8define <8 x i32> @concat_2xv4i32(<4 x i32> %a, <4 x i32> %b) {
9; VLA-LABEL: concat_2xv4i32:
10; VLA:       # %bb.0:
11; VLA-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
12; VLA-NEXT:    vmv1r.v v10, v9
13; VLA-NEXT:    vslideup.vi v8, v10, 4
14; VLA-NEXT:    ret
15;
16; VLS-LABEL: concat_2xv4i32:
17; VLS:       # %bb.0:
18; VLS-NEXT:    ret
19  %ab = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
20  ret <8 x i32> %ab
21}
22
23define <8 x i32> @concat_4xv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i32> %d) {
24; VLA-LABEL: concat_4xv2i32:
25; VLA:       # %bb.0:
26; VLA-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
27; VLA-NEXT:    vslideup.vi v10, v11, 2
28; VLA-NEXT:    vslideup.vi v8, v9, 2
29; VLA-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
30; VLA-NEXT:    vslideup.vi v8, v10, 4
31; VLA-NEXT:    ret
32;
33; VLS-LABEL: concat_4xv2i32:
34; VLS:       # %bb.0:
35; VLS-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
36; VLS-NEXT:    vmv1r.v v13, v10
37; VLS-NEXT:    vmv1r.v v12, v8
38; VLS-NEXT:    vslideup.vi v13, v11, 2
39; VLS-NEXT:    vslideup.vi v12, v9, 2
40; VLS-NEXT:    vmv2r.v v8, v12
41; VLS-NEXT:    ret
42  %ab = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
43  %cd = shufflevector <2 x i32> %c, <2 x i32> %d, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
44  %abcd = shufflevector <4 x i32> %ab, <4 x i32> %cd, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
45  ret <8 x i32> %abcd
46}
47
48define <8 x i32> @concat_8xv1i32(<1 x i32> %a, <1 x i32> %b, <1 x i32> %c, <1 x i32> %d, <1 x i32> %e, <1 x i32> %f, <1 x i32> %g, <1 x i32> %h) {
49; VLA-LABEL: concat_8xv1i32:
50; VLA:       # %bb.0:
51; VLA-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
52; VLA-NEXT:    vslideup.vi v14, v15, 1
53; VLA-NEXT:    vslideup.vi v12, v13, 1
54; VLA-NEXT:    vslideup.vi v10, v11, 1
55; VLA-NEXT:    vslideup.vi v8, v9, 1
56; VLA-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
57; VLA-NEXT:    vslideup.vi v12, v14, 2
58; VLA-NEXT:    vslideup.vi v8, v10, 2
59; VLA-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
60; VLA-NEXT:    vslideup.vi v8, v12, 4
61; VLA-NEXT:    ret
62;
63; VLS-LABEL: concat_8xv1i32:
64; VLS:       # %bb.0:
65; VLS-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
66; VLS-NEXT:    vmv1r.v v17, v12
67; VLS-NEXT:    vmv1r.v v16, v8
68; VLS-NEXT:    vslideup.vi v14, v15, 1
69; VLS-NEXT:    vslideup.vi v17, v13, 1
70; VLS-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
71; VLS-NEXT:    vslideup.vi v17, v14, 2
72; VLS-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
73; VLS-NEXT:    vslideup.vi v10, v11, 1
74; VLS-NEXT:    vslideup.vi v16, v9, 1
75; VLS-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
76; VLS-NEXT:    vslideup.vi v16, v10, 2
77; VLS-NEXT:    vmv2r.v v8, v16
78; VLS-NEXT:    ret
79  %ab = shufflevector <1 x i32> %a, <1 x i32> %b, <2 x i32> <i32 0, i32 1>
80  %cd = shufflevector <1 x i32> %c, <1 x i32> %d, <2 x i32> <i32 0, i32 1>
81  %abcd = shufflevector <2 x i32> %ab, <2 x i32> %cd, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
82  %ef = shufflevector <1 x i32> %e, <1 x i32> %f, <2 x i32> <i32 0, i32 1>
83  %gh = shufflevector <1 x i32> %g, <1 x i32> %h, <2 x i32> <i32 0, i32 1>
84  %efgh = shufflevector <2 x i32> %ef, <2 x i32> %gh, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
85  %abcdefgh = shufflevector <4 x i32> %abcd, <4 x i32> %efgh, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
86  ret <8 x i32> %abcdefgh
87}
88
89define <16 x i32> @concat_2xv8i32(<8 x i32> %a, <8 x i32> %b) {
90; VLA-LABEL: concat_2xv8i32:
91; VLA:       # %bb.0:
92; VLA-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
93; VLA-NEXT:    vmv2r.v v12, v10
94; VLA-NEXT:    vslideup.vi v8, v12, 8
95; VLA-NEXT:    ret
96;
97; VLS-LABEL: concat_2xv8i32:
98; VLS:       # %bb.0:
99; VLS-NEXT:    ret
100  %v = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
101  ret <16 x i32> %v
102}
103
104define <16 x i32> @concat_4xv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) {
105; VLA-LABEL: concat_4xv4i32:
106; VLA:       # %bb.0:
107; VLA-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
108; VLA-NEXT:    vmv1r.v v14, v11
109; VLA-NEXT:    vmv1r.v v12, v10
110; VLA-NEXT:    vmv1r.v v10, v9
111; VLA-NEXT:    vslideup.vi v12, v14, 4
112; VLA-NEXT:    vslideup.vi v8, v10, 4
113; VLA-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
114; VLA-NEXT:    vslideup.vi v8, v12, 8
115; VLA-NEXT:    ret
116;
117; VLS-LABEL: concat_4xv4i32:
118; VLS:       # %bb.0:
119; VLS-NEXT:    ret
120  %ab = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
121  %cd = shufflevector <4 x i32> %c, <4 x i32> %d, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
122  %abcd = shufflevector <8 x i32> %ab, <8 x i32> %cd, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
123  ret <16 x i32> %abcd
124}
125
126define <16 x i32> @concat_8xv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i32> %d, <2 x i32> %e, <2 x i32> %f, <2 x i32> %g, <2 x i32> %h) {
127; VLA-LABEL: concat_8xv2i32:
128; VLA:       # %bb.0:
129; VLA-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
130; VLA-NEXT:    vslideup.vi v14, v15, 2
131; VLA-NEXT:    vslideup.vi v12, v13, 2
132; VLA-NEXT:    vslideup.vi v10, v11, 2
133; VLA-NEXT:    vslideup.vi v8, v9, 2
134; VLA-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
135; VLA-NEXT:    vslideup.vi v12, v14, 4
136; VLA-NEXT:    vslideup.vi v8, v10, 4
137; VLA-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
138; VLA-NEXT:    vslideup.vi v8, v12, 8
139; VLA-NEXT:    ret
140;
141; VLS-LABEL: concat_8xv2i32:
142; VLS:       # %bb.0:
143; VLS-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
144; VLS-NEXT:    vmv1r.v v19, v14
145; VLS-NEXT:    vmv1r.v v18, v12
146; VLS-NEXT:    vmv1r.v v17, v10
147; VLS-NEXT:    vmv1r.v v16, v8
148; VLS-NEXT:    vslideup.vi v19, v15, 2
149; VLS-NEXT:    vslideup.vi v18, v13, 2
150; VLS-NEXT:    vslideup.vi v17, v11, 2
151; VLS-NEXT:    vslideup.vi v16, v9, 2
152; VLS-NEXT:    vmv4r.v v8, v16
153; VLS-NEXT:    ret
154  %ab = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
155  %cd = shufflevector <2 x i32> %c, <2 x i32> %d, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
156  %abcd = shufflevector <4 x i32> %ab, <4 x i32> %cd, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
157  %ef = shufflevector <2 x i32> %e, <2 x i32> %f, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
158  %gh = shufflevector <2 x i32> %g, <2 x i32> %h, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
159  %efgh = shufflevector <4 x i32> %ef, <4 x i32> %gh, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
160  %abcdefgh = shufflevector <8 x i32> %abcd, <8 x i32> %efgh, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
161  ret <16 x i32> %abcdefgh
162}
163
164define <32 x i32> @concat_2xv16i32(<16 x i32> %a, <16 x i32> %b) {
165; VLA-LABEL: concat_2xv16i32:
166; VLA:       # %bb.0:
167; VLA-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
168; VLA-NEXT:    vmv4r.v v16, v12
169; VLA-NEXT:    li a0, 32
170; VLA-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
171; VLA-NEXT:    vslideup.vi v8, v16, 16
172; VLA-NEXT:    ret
173;
174; VLS-LABEL: concat_2xv16i32:
175; VLS:       # %bb.0:
176; VLS-NEXT:    ret
177  %ab = shufflevector <16 x i32> %a, <16 x i32> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
178  ret <32 x i32> %ab
179}
180
181define <32 x i32> @concat_4xv8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d) {
182; VLA-LABEL: concat_4xv8i32:
183; VLA:       # %bb.0:
184; VLA-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
185; VLA-NEXT:    vmv2r.v v20, v14
186; VLA-NEXT:    vmv2r.v v16, v12
187; VLA-NEXT:    vmv2r.v v12, v10
188; VLA-NEXT:    li a0, 32
189; VLA-NEXT:    vslideup.vi v16, v20, 8
190; VLA-NEXT:    vslideup.vi v8, v12, 8
191; VLA-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
192; VLA-NEXT:    vslideup.vi v8, v16, 16
193; VLA-NEXT:    ret
194;
195; VLS-LABEL: concat_4xv8i32:
196; VLS:       # %bb.0:
197; VLS-NEXT:    ret
198  %ab = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
199  %cd = shufflevector <8 x i32> %c, <8 x i32> %d, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
200  %abcd = shufflevector <16 x i32> %ab, <16 x i32> %cd, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
201  ret <32 x i32> %abcd
202}
203
204define <32 x i32> @concat_8xv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d, <4 x i32> %e, <4 x i32> %f, <4 x i32> %g, <4 x i32> %h) {
205; VLA-LABEL: concat_8xv4i32:
206; VLA:       # %bb.0:
207; VLA-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
208; VLA-NEXT:    vmv1r.v v18, v15
209; VLA-NEXT:    vmv1r.v v20, v14
210; VLA-NEXT:    vmv1r.v v14, v13
211; VLA-NEXT:    vmv1r.v v16, v12
212; VLA-NEXT:    vmv1r.v v22, v11
213; VLA-NEXT:    vmv1r.v v12, v10
214; VLA-NEXT:    vmv1r.v v10, v9
215; VLA-NEXT:    li a0, 32
216; VLA-NEXT:    vslideup.vi v20, v18, 4
217; VLA-NEXT:    vslideup.vi v16, v14, 4
218; VLA-NEXT:    vslideup.vi v12, v22, 4
219; VLA-NEXT:    vslideup.vi v8, v10, 4
220; VLA-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
221; VLA-NEXT:    vslideup.vi v16, v20, 8
222; VLA-NEXT:    vslideup.vi v8, v12, 8
223; VLA-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
224; VLA-NEXT:    vslideup.vi v8, v16, 16
225; VLA-NEXT:    ret
226;
227; VLS-LABEL: concat_8xv4i32:
228; VLS:       # %bb.0:
229; VLS-NEXT:    ret
230  %ab = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
231  %cd = shufflevector <4 x i32> %c, <4 x i32> %d, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
232  %abcd = shufflevector <8 x i32> %ab, <8 x i32> %cd, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
233  %ef = shufflevector <4 x i32> %e, <4 x i32> %f, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
234  %gh = shufflevector <4 x i32> %g, <4 x i32> %h, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
235  %efgh = shufflevector <8 x i32> %ef, <8 x i32> %gh, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
236  %abcdefgh = shufflevector <16 x i32> %abcd, <16 x i32> %efgh, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
237  ret <32 x i32> %abcdefgh
238}
239