xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll (revision 1cb599835ccf7ee8b2d1d5a7f3107e19a26fc6f5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
4
5target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
6
7define <8 x i8> @trn1.v8i8(<8 x i8> %v0, <8 x i8> %v1) {
8; CHECK-LABEL: trn1.v8i8:
9; CHECK:       # %bb.0:
10; CHECK-NEXT:    li a0, 170
11; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
12; CHECK-NEXT:    vmv.s.x v0, a0
13; CHECK-NEXT:    vslideup.vi v8, v9, 1, v0.t
14; CHECK-NEXT:    ret
15  %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
16  ret <8 x i8> %tmp0
17}
18
19define <8 x i8> @trn2.v8i8(<8 x i8> %v0, <8 x i8> %v1) {
20; CHECK-LABEL: trn2.v8i8:
21; CHECK:       # %bb.0:
22; CHECK-NEXT:    li a0, 85
23; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
24; CHECK-NEXT:    vmv.s.x v0, a0
25; CHECK-NEXT:    vslidedown.vi v9, v8, 1, v0.t
26; CHECK-NEXT:    vmv1r.v v8, v9
27; CHECK-NEXT:    ret
28  %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
29  ret <8 x i8> %tmp0
30}
31
32define <16 x i8> @trn1.v16i8(<16 x i8> %v0, <16 x i8> %v1) {
33; CHECK-LABEL: trn1.v16i8:
34; CHECK:       # %bb.0:
35; CHECK-NEXT:    lui a0, 11
36; CHECK-NEXT:    addi a0, a0, -1366
37; CHECK-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
38; CHECK-NEXT:    vmv.s.x v0, a0
39; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
40; CHECK-NEXT:    vslideup.vi v8, v9, 1, v0.t
41; CHECK-NEXT:    ret
42  %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
43  ret <16 x i8> %tmp0
44}
45
46define <16 x i8> @trn2.v16i8(<16 x i8> %v0, <16 x i8> %v1) {
47; CHECK-LABEL: trn2.v16i8:
48; CHECK:       # %bb.0:
49; CHECK-NEXT:    lui a0, 5
50; CHECK-NEXT:    addi a0, a0, 1365
51; CHECK-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
52; CHECK-NEXT:    vmv.s.x v0, a0
53; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
54; CHECK-NEXT:    vslidedown.vi v9, v8, 1, v0.t
55; CHECK-NEXT:    vmv.v.v v8, v9
56; CHECK-NEXT:    ret
57  %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
58  ret <16 x i8> %tmp0
59}
60
61define <4 x i16> @trn1.v4i16(<4 x i16> %v0, <4 x i16> %v1) {
62; CHECK-LABEL: trn1.v4i16:
63; CHECK:       # %bb.0:
64; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
65; CHECK-NEXT:    vmv.v.i v0, 10
66; CHECK-NEXT:    vslideup.vi v8, v9, 1, v0.t
67; CHECK-NEXT:    ret
68  %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
69  ret <4 x i16> %tmp0
70}
71
72define <4 x i16> @trn2.v4i16(<4 x i16> %v0, <4 x i16> %v1) {
73; CHECK-LABEL: trn2.v4i16:
74; CHECK:       # %bb.0:
75; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
76; CHECK-NEXT:    vmv.v.i v0, 5
77; CHECK-NEXT:    vslidedown.vi v9, v8, 1, v0.t
78; CHECK-NEXT:    vmv1r.v v8, v9
79; CHECK-NEXT:    ret
80  %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
81  ret <4 x i16> %tmp0
82}
83
84define <8 x i16> @trn1.v8i16(<8 x i16> %v0, <8 x i16> %v1) {
85; CHECK-LABEL: trn1.v8i16:
86; CHECK:       # %bb.0:
87; CHECK-NEXT:    li a0, 170
88; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
89; CHECK-NEXT:    vmv.s.x v0, a0
90; CHECK-NEXT:    vslideup.vi v8, v9, 1, v0.t
91; CHECK-NEXT:    ret
92  %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
93  ret <8 x i16> %tmp0
94}
95
96define <8 x i16> @trn2.v8i16(<8 x i16> %v0, <8 x i16> %v1) {
97; CHECK-LABEL: trn2.v8i16:
98; CHECK:       # %bb.0:
99; CHECK-NEXT:    li a0, 85
100; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
101; CHECK-NEXT:    vmv.s.x v0, a0
102; CHECK-NEXT:    vslidedown.vi v9, v8, 1, v0.t
103; CHECK-NEXT:    vmv.v.v v8, v9
104; CHECK-NEXT:    ret
105  %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
106  ret <8 x i16> %tmp0
107}
108
109define <2 x i32> @trn1.v2i32(<2 x i32> %v0, <2 x i32> %v1) {
110; CHECK-LABEL: trn1.v2i32:
111; CHECK:       # %bb.0:
112; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
113; CHECK-NEXT:    vslideup.vi v8, v9, 1
114; CHECK-NEXT:    ret
115  %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 0, i32 2>
116  ret <2 x i32> %tmp0
117}
118
119define <2 x i32> @trn2.v2i32(<2 x i32> %v0, <2 x i32> %v1) {
120; CHECK-LABEL: trn2.v2i32:
121; CHECK:       # %bb.0:
122; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
123; CHECK-NEXT:    vmv.v.i v0, 1
124; CHECK-NEXT:    vrgather.vi v9, v8, 1, v0.t
125; CHECK-NEXT:    vmv1r.v v8, v9
126; CHECK-NEXT:    ret
127  %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 1, i32 3>
128  ret <2 x i32> %tmp0
129}
130
131define <4 x i32> @trn1.v4i32(<4 x i32> %v0, <4 x i32> %v1) {
132; CHECK-LABEL: trn1.v4i32:
133; CHECK:       # %bb.0:
134; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
135; CHECK-NEXT:    vmv.v.i v0, 10
136; CHECK-NEXT:    vslideup.vi v8, v9, 1, v0.t
137; CHECK-NEXT:    ret
138  %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
139  ret <4 x i32> %tmp0
140}
141
142define <4 x i32> @trn2.v4i32(<4 x i32> %v0, <4 x i32> %v1) {
143; CHECK-LABEL: trn2.v4i32:
144; CHECK:       # %bb.0:
145; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
146; CHECK-NEXT:    vmv.v.i v0, 5
147; CHECK-NEXT:    vslidedown.vi v9, v8, 1, v0.t
148; CHECK-NEXT:    vmv.v.v v8, v9
149; CHECK-NEXT:    ret
150  %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
151  ret <4 x i32> %tmp0
152}
153
154define <2 x i64> @trn1.v2i64(<2 x i64> %v0, <2 x i64> %v1) {
155; CHECK-LABEL: trn1.v2i64:
156; CHECK:       # %bb.0:
157; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
158; CHECK-NEXT:    vslideup.vi v8, v9, 1
159; CHECK-NEXT:    ret
160  %tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> <i32 0, i32 2>
161  ret <2 x i64> %tmp0
162}
163
164define <2 x i64> @trn2.v2i64(<2 x i64> %v0, <2 x i64> %v1) {
165; CHECK-LABEL: trn2.v2i64:
166; CHECK:       # %bb.0:
167; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
168; CHECK-NEXT:    vmv.v.i v0, 1
169; CHECK-NEXT:    vrgather.vi v9, v8, 1, v0.t
170; CHECK-NEXT:    vmv.v.v v8, v9
171; CHECK-NEXT:    ret
172  %tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> <i32 1, i32 3>
173  ret <2 x i64> %tmp0
174}
175
176define <2 x float> @trn1.v2f32(<2 x float> %v0, <2 x float> %v1) {
177; CHECK-LABEL: trn1.v2f32:
178; CHECK:       # %bb.0:
179; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
180; CHECK-NEXT:    vslideup.vi v8, v9, 1
181; CHECK-NEXT:    ret
182  %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> <i32 0, i32 2>
183  ret <2 x float> %tmp0
184}
185
186define <2 x float> @trn2.v2f32(<2 x float> %v0, <2 x float> %v1) {
187; CHECK-LABEL: trn2.v2f32:
188; CHECK:       # %bb.0:
189; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
190; CHECK-NEXT:    vmv.v.i v0, 1
191; CHECK-NEXT:    vrgather.vi v9, v8, 1, v0.t
192; CHECK-NEXT:    vmv1r.v v8, v9
193; CHECK-NEXT:    ret
194  %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> <i32 1, i32 3>
195  ret <2 x float> %tmp0
196}
197
198define <4 x float> @trn1.v4f32(<4 x float> %v0, <4 x float> %v1) {
199; CHECK-LABEL: trn1.v4f32:
200; CHECK:       # %bb.0:
201; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
202; CHECK-NEXT:    vmv.v.i v0, 10
203; CHECK-NEXT:    vslideup.vi v8, v9, 1, v0.t
204; CHECK-NEXT:    ret
205  %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
206  ret <4 x float> %tmp0
207}
208
209define <4 x float> @trn2.v4f32(<4 x float> %v0, <4 x float> %v1) {
210; CHECK-LABEL: trn2.v4f32:
211; CHECK:       # %bb.0:
212; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
213; CHECK-NEXT:    vmv.v.i v0, 5
214; CHECK-NEXT:    vslidedown.vi v9, v8, 1, v0.t
215; CHECK-NEXT:    vmv.v.v v8, v9
216; CHECK-NEXT:    ret
217  %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
218  ret <4 x float> %tmp0
219}
220
221define <2 x double> @trn1.v2f64(<2 x double> %v0, <2 x double> %v1) {
222; CHECK-LABEL: trn1.v2f64:
223; CHECK:       # %bb.0:
224; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
225; CHECK-NEXT:    vslideup.vi v8, v9, 1
226; CHECK-NEXT:    ret
227  %tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> <i32 0, i32 2>
228  ret <2 x double> %tmp0
229}
230
231define <2 x double> @trn2.v2f64(<2 x double> %v0, <2 x double> %v1) {
232; CHECK-LABEL: trn2.v2f64:
233; CHECK:       # %bb.0:
234; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
235; CHECK-NEXT:    vmv.v.i v0, 1
236; CHECK-NEXT:    vrgather.vi v9, v8, 1, v0.t
237; CHECK-NEXT:    vmv.v.v v8, v9
238; CHECK-NEXT:    ret
239  %tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> <i32 1, i32 3>
240  ret <2 x double> %tmp0
241}
242
243define <4 x half> @trn1.v4f16(<4 x half> %v0, <4 x half> %v1) {
244; CHECK-LABEL: trn1.v4f16:
245; CHECK:       # %bb.0:
246; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
247; CHECK-NEXT:    vmv.v.i v0, 10
248; CHECK-NEXT:    vslideup.vi v8, v9, 1, v0.t
249; CHECK-NEXT:    ret
250  %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
251  ret <4 x half> %tmp0
252}
253
254define <4 x half> @trn2.v4f16(<4 x half> %v0, <4 x half> %v1) {
255; CHECK-LABEL: trn2.v4f16:
256; CHECK:       # %bb.0:
257; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
258; CHECK-NEXT:    vmv.v.i v0, 5
259; CHECK-NEXT:    vslidedown.vi v9, v8, 1, v0.t
260; CHECK-NEXT:    vmv1r.v v8, v9
261; CHECK-NEXT:    ret
262  %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
263  ret <4 x half> %tmp0
264}
265
266define <8 x half> @trn1.v8f16(<8 x half> %v0, <8 x half> %v1) {
267; CHECK-LABEL: trn1.v8f16:
268; CHECK:       # %bb.0:
269; CHECK-NEXT:    li a0, 170
270; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
271; CHECK-NEXT:    vmv.s.x v0, a0
272; CHECK-NEXT:    vslideup.vi v8, v9, 1, v0.t
273; CHECK-NEXT:    ret
274  %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
275  ret <8 x half> %tmp0
276}
277
278define <8 x half> @trn2.v8f16(<8 x half> %v0, <8 x half> %v1) {
279; CHECK-LABEL: trn2.v8f16:
280; CHECK:       # %bb.0:
281; CHECK-NEXT:    li a0, 85
282; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
283; CHECK-NEXT:    vmv.s.x v0, a0
284; CHECK-NEXT:    vslidedown.vi v9, v8, 1, v0.t
285; CHECK-NEXT:    vmv.v.v v8, v9
286; CHECK-NEXT:    ret
287  %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
288  ret <8 x half> %tmp0
289}
290;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
291; RV32: {{.*}}
292; RV64: {{.*}}
293