xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll (revision 088db868f3370ffe01c9750f75732679efecd1fe)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=riscv32 -mattr=+v \
3; RUN:   -lower-interleaved-accesses=false -verify-machineinstrs \
4; RUN:   | FileCheck %s --check-prefix=CHECK
5; RUN: llc < %s -mtriple=riscv64 -mattr=+v \
6; RUN:   -lower-interleaved-accesses=false -verify-machineinstrs \
7; RUN:   | FileCheck %s --check-prefix=CHECK
8
9define void @deinterleave3_0_i8(ptr %in, ptr %out) {
10; CHECK-LABEL: deinterleave3_0_i8:
11; CHECK:       # %bb.0: # %entry
12; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
13; CHECK-NEXT:    vle8.v v8, (a0)
14; CHECK-NEXT:    lui a0, %hi(.LCPI0_0)
15; CHECK-NEXT:    addi a0, a0, %lo(.LCPI0_0)
16; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
17; CHECK-NEXT:    vle8.v v9, (a0)
18; CHECK-NEXT:    li a0, 73
19; CHECK-NEXT:    vmv.s.x v0, a0
20; CHECK-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
21; CHECK-NEXT:    vslidedown.vi v10, v8, 8
22; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
23; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
24; CHECK-NEXT:    vrgather.vv v10, v8, v9
25; CHECK-NEXT:    vse8.v v10, (a1)
26; CHECK-NEXT:    ret
27entry:
28  %0 = load <16 x i8>, ptr %in, align 1
29  %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 undef, i32 undef>
30  store <8 x i8> %shuffle.i5, ptr %out, align 1
31  ret void
32}
33
34define void @deinterleave3_8_i8(ptr %in, ptr %out) {
35; CHECK-LABEL: deinterleave3_8_i8:
36; CHECK:       # %bb.0: # %entry
37; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
38; CHECK-NEXT:    vle8.v v8, (a0)
39; CHECK-NEXT:    lui a0, %hi(.LCPI1_0)
40; CHECK-NEXT:    addi a0, a0, %lo(.LCPI1_0)
41; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
42; CHECK-NEXT:    vle8.v v9, (a0)
43; CHECK-NEXT:    li a0, 146
44; CHECK-NEXT:    vmv.s.x v0, a0
45; CHECK-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
46; CHECK-NEXT:    vslidedown.vi v10, v8, 8
47; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
48; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
49; CHECK-NEXT:    vrgather.vv v10, v8, v9
50; CHECK-NEXT:    vse8.v v10, (a1)
51; CHECK-NEXT:    ret
52entry:
53  %0 = load <16 x i8>, ptr %in, align 1
54  %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 undef, i32 undef, i32 undef>
55  store <8 x i8> %shuffle.i5, ptr %out, align 1
56  ret void
57}
58
59define void @deinterleave4_0_i8(ptr %in, ptr %out) {
60; CHECK-LABEL: deinterleave4_0_i8:
61; CHECK:       # %bb.0: # %entry
62; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
63; CHECK-NEXT:    vle8.v v8, (a0)
64; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
65; CHECK-NEXT:    vnsrl.wi v8, v8, 0
66; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
67; CHECK-NEXT:    vnsrl.wi v8, v8, 0
68; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
69; CHECK-NEXT:    vse8.v v8, (a1)
70; CHECK-NEXT:    ret
71entry:
72  %0 = load <16 x i8>, ptr %in, align 1
73  %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 undef, i32 undef, i32 undef, i32 undef>
74  store <8 x i8> %shuffle.i5, ptr %out, align 1
75  ret void
76}
77
78define void @deinterleave4_8_i8(ptr %in, ptr %out) {
79; CHECK-LABEL: deinterleave4_8_i8:
80; CHECK:       # %bb.0: # %entry
81; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
82; CHECK-NEXT:    vle8.v v8, (a0)
83; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
84; CHECK-NEXT:    vnsrl.wi v8, v8, 8
85; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
86; CHECK-NEXT:    vnsrl.wi v8, v8, 0
87; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
88; CHECK-NEXT:    vse8.v v8, (a1)
89; CHECK-NEXT:    ret
90entry:
91  %0 = load <16 x i8>, ptr %in, align 1
92  %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
93  store <8 x i8> %shuffle.i5, ptr %out, align 1
94  ret void
95}
96
97define void @deinterleave5_0_i8(ptr %in, ptr %out) {
98; CHECK-LABEL: deinterleave5_0_i8:
99; CHECK:       # %bb.0: # %entry
100; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
101; CHECK-NEXT:    vle8.v v8, (a0)
102; CHECK-NEXT:    li a0, 33
103; CHECK-NEXT:    vmv.s.x v0, a0
104; CHECK-NEXT:    lui a0, 28704
105; CHECK-NEXT:    addi a0, a0, 1280
106; CHECK-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
107; CHECK-NEXT:    vslidedown.vi v9, v8, 8
108; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
109; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
110; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
111; CHECK-NEXT:    vmv.v.x v9, a0
112; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
113; CHECK-NEXT:    vrgather.vv v10, v8, v9
114; CHECK-NEXT:    vse8.v v10, (a1)
115; CHECK-NEXT:    ret
116entry:
117  %0 = load <16 x i8>, ptr %in, align 1
118  %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 0, i32 5, i32 10, i32 15, i32 undef, i32 undef, i32 undef, i32 undef>
119  store <8 x i8> %shuffle.i5, ptr %out, align 1
120  ret void
121}
122
123define void @deinterleave5_8_i8(ptr %in, ptr %out) {
124; CHECK-LABEL: deinterleave5_8_i8:
125; CHECK:       # %bb.0: # %entry
126; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
127; CHECK-NEXT:    vle8.v v8, (a0)
128; CHECK-NEXT:    li a0, 66
129; CHECK-NEXT:    vmv.v.i v0, 4
130; CHECK-NEXT:    vmv.s.x v9, a0
131; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
132; CHECK-NEXT:    vcompress.vm v10, v8, v9
133; CHECK-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
134; CHECK-NEXT:    vslidedown.vi v8, v8, 8
135; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
136; CHECK-NEXT:    vrgather.vi v10, v8, 3, v0.t
137; CHECK-NEXT:    vse8.v v10, (a1)
138; CHECK-NEXT:    ret
139entry:
140  %0 = load <16 x i8>, ptr %in, align 1
141  %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 1, i32 6, i32 11, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
142  store <8 x i8> %shuffle.i5, ptr %out, align 1
143  ret void
144}
145
146define void @deinterleave6_0_i8(ptr %in, ptr %out) {
147; CHECK-LABEL: deinterleave6_0_i8:
148; CHECK:       # %bb.0: # %entry
149; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
150; CHECK-NEXT:    vle8.v v8, (a0)
151; CHECK-NEXT:    li a0, 65
152; CHECK-NEXT:    vmv.v.i v0, 4
153; CHECK-NEXT:    vmv.s.x v9, a0
154; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
155; CHECK-NEXT:    vcompress.vm v10, v8, v9
156; CHECK-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
157; CHECK-NEXT:    vslidedown.vi v8, v8, 8
158; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
159; CHECK-NEXT:    vrgather.vi v10, v8, 4, v0.t
160; CHECK-NEXT:    vse8.v v10, (a1)
161; CHECK-NEXT:    ret
162entry:
163  %0 = load <16 x i8>, ptr %in, align 1
164  %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 0, i32 6, i32 12, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
165  store <8 x i8> %shuffle.i5, ptr %out, align 1
166  ret void
167}
168
169define void @deinterleave6_8_i8(ptr %in, ptr %out) {
170; CHECK-LABEL: deinterleave6_8_i8:
171; CHECK:       # %bb.0: # %entry
172; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
173; CHECK-NEXT:    vle8.v v8, (a0)
174; CHECK-NEXT:    li a0, 130
175; CHECK-NEXT:    vmv.v.i v0, 4
176; CHECK-NEXT:    vmv.s.x v9, a0
177; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
178; CHECK-NEXT:    vcompress.vm v10, v8, v9
179; CHECK-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
180; CHECK-NEXT:    vslidedown.vi v8, v8, 8
181; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
182; CHECK-NEXT:    vrgather.vi v10, v8, 5, v0.t
183; CHECK-NEXT:    vse8.v v10, (a1)
184; CHECK-NEXT:    ret
185entry:
186  %0 = load <16 x i8>, ptr %in, align 1
187  %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 1, i32 7, i32 13, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
188  store <8 x i8> %shuffle.i5, ptr %out, align 1
189  ret void
190}
191
192define void @deinterleave7_0_i8(ptr %in, ptr %out) {
193; CHECK-LABEL: deinterleave7_0_i8:
194; CHECK:       # %bb.0: # %entry
195; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
196; CHECK-NEXT:    vle8.v v8, (a0)
197; CHECK-NEXT:    li a0, 129
198; CHECK-NEXT:    vmv.v.i v0, 4
199; CHECK-NEXT:    vmv.s.x v9, a0
200; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
201; CHECK-NEXT:    vcompress.vm v10, v8, v9
202; CHECK-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
203; CHECK-NEXT:    vslidedown.vi v8, v8, 8
204; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
205; CHECK-NEXT:    vrgather.vi v10, v8, 6, v0.t
206; CHECK-NEXT:    vse8.v v10, (a1)
207; CHECK-NEXT:    ret
208entry:
209  %0 = load <16 x i8>, ptr %in, align 1
210  %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 0, i32 7, i32 14, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
211  store <8 x i8> %shuffle.i5, ptr %out, align 1
212  ret void
213}
214
215define void @deinterleave7_8_i8(ptr %in, ptr %out) {
216; CHECK-LABEL: deinterleave7_8_i8:
217; CHECK:       # %bb.0: # %entry
218; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
219; CHECK-NEXT:    vle8.v v8, (a0)
220; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
221; CHECK-NEXT:    vmv.v.i v9, -6
222; CHECK-NEXT:    vid.v v10
223; CHECK-NEXT:    li a0, 6
224; CHECK-NEXT:    vmv.v.i v0, 6
225; CHECK-NEXT:    vmadd.vx v10, a0, v9
226; CHECK-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
227; CHECK-NEXT:    vslidedown.vi v9, v8, 8
228; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
229; CHECK-NEXT:    vrgather.vi v11, v8, 1
230; CHECK-NEXT:    vrgather.vv v11, v9, v10, v0.t
231; CHECK-NEXT:    vse8.v v11, (a1)
232; CHECK-NEXT:    ret
233entry:
234  %0 = load <16 x i8>, ptr %in, align 1
235  %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 1, i32 8, i32 14, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
236  store <8 x i8> %shuffle.i5, ptr %out, align 1
237  ret void
238}
239
240define void @deinterleave8_0_i8(ptr %in, ptr %out) {
241; CHECK-LABEL: deinterleave8_0_i8:
242; CHECK:       # %bb.0: # %entry
243; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
244; CHECK-NEXT:    vle8.v v8, (a0)
245; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
246; CHECK-NEXT:    vnsrl.wi v8, v8, 0
247; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
248; CHECK-NEXT:    vnsrl.wi v8, v8, 0
249; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
250; CHECK-NEXT:    vnsrl.wi v8, v8, 0
251; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
252; CHECK-NEXT:    vse8.v v8, (a1)
253; CHECK-NEXT:    ret
254entry:
255  %0 = load <16 x i8>, ptr %in, align 1
256  %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 0, i32 8, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
257  store <8 x i8> %shuffle.i5, ptr %out, align 1
258  ret void
259}
260
261define void @deinterleave8_8_i8(ptr %in, ptr %out) {
262; CHECK-LABEL: deinterleave8_8_i8:
263; CHECK:       # %bb.0: # %entry
264; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
265; CHECK-NEXT:    vle8.v v8, (a0)
266; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
267; CHECK-NEXT:    vnsrl.wi v8, v8, 8
268; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
269; CHECK-NEXT:    vnsrl.wi v8, v8, 0
270; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
271; CHECK-NEXT:    vnsrl.wi v8, v8, 0
272; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
273; CHECK-NEXT:    vse8.v v8, (a1)
274; CHECK-NEXT:    ret
275entry:
276  %0 = load <16 x i8>, ptr %in, align 1
277  %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 1, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
278  store <8 x i8> %shuffle.i5, ptr %out, align 1
279  ret void
280}
281
282; Exercise the high lmul case
283define void @deinterleave7_0_i64(ptr %in, ptr %out) {
284; CHECK-LABEL: deinterleave7_0_i64:
285; CHECK:       # %bb.0: # %entry
286; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
287; CHECK-NEXT:    vle64.v v8, (a0)
288; CHECK-NEXT:    li a0, 129
289; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
290; CHECK-NEXT:    vmv.v.i v0, 4
291; CHECK-NEXT:    vmv.s.x v16, a0
292; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
293; CHECK-NEXT:    vcompress.vm v20, v8, v16
294; CHECK-NEXT:    vsetivli zero, 8, e64, m8, ta, ma
295; CHECK-NEXT:    vslidedown.vi v8, v8, 8
296; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
297; CHECK-NEXT:    vrgather.vi v20, v8, 6, v0.t
298; CHECK-NEXT:    vse64.v v20, (a1)
299; CHECK-NEXT:    ret
300entry:
301  %0 = load <16 x i64>, ptr %in
302  %shuffle.i5 = shufflevector <16 x i64> %0, <16 x i64> poison, <8 x i32> <i32 0, i32 7, i32 14, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
303  store <8 x i64> %shuffle.i5, ptr %out
304  ret void
305}
306
307; Store back only the active subvector
308define void @deinterleave4_0_i8_subvec(ptr %in, ptr %out) {
309; CHECK-LABEL: deinterleave4_0_i8_subvec:
310; CHECK:       # %bb.0: # %entry
311; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
312; CHECK-NEXT:    vle8.v v8, (a0)
313; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
314; CHECK-NEXT:    vnsrl.wi v8, v8, 0
315; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
316; CHECK-NEXT:    vnsrl.wi v8, v8, 0
317; CHECK-NEXT:    vse8.v v8, (a1)
318; CHECK-NEXT:    ret
319entry:
320  %0 = load <16 x i8>, ptr %in, align 1
321  %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
322  store <4 x i8> %shuffle.i5, ptr %out, align 1
323  ret void
324}
325
326; Store back only the active subvector
327define void @deinterleave7_0_i32_subvec(ptr %in, ptr %out) {
328; CHECK-LABEL: deinterleave7_0_i32_subvec:
329; CHECK:       # %bb.0: # %entry
330; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
331; CHECK-NEXT:    vle32.v v8, (a0)
332; CHECK-NEXT:    li a0, 129
333; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
334; CHECK-NEXT:    vmv.v.i v0, 4
335; CHECK-NEXT:    vmv.s.x v12, a0
336; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
337; CHECK-NEXT:    vcompress.vm v14, v8, v12
338; CHECK-NEXT:    vsetivli zero, 8, e32, m4, ta, ma
339; CHECK-NEXT:    vslidedown.vi v8, v8, 8
340; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
341; CHECK-NEXT:    vrgather.vi v14, v8, 6, v0.t
342; CHECK-NEXT:    vsetivli zero, 3, e32, m1, ta, ma
343; CHECK-NEXT:    vse32.v v14, (a1)
344; CHECK-NEXT:    ret
345entry:
346  %0 = load <16 x i32>, ptr %in
347  %shuffle.i5 = shufflevector <16 x i32> %0, <16 x i32> poison, <3 x i32> <i32 0, i32 7, i32 14>
348  store <3 x i32> %shuffle.i5, ptr %out
349  ret void
350}
351
352; Store back only the active subvector
353define void @deinterleave8_0_i8_subvec(ptr %in, ptr %out) {
354; CHECK-LABEL: deinterleave8_0_i8_subvec:
355; CHECK:       # %bb.0: # %entry
356; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
357; CHECK-NEXT:    vle8.v v8, (a0)
358; CHECK-NEXT:    vslidedown.vi v9, v8, 8
359; CHECK-NEXT:    vmv.x.s a0, v8
360; CHECK-NEXT:    vmv.x.s a2, v9
361; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
362; CHECK-NEXT:    vmv.v.x v8, a0
363; CHECK-NEXT:    vslide1down.vx v8, v8, a2
364; CHECK-NEXT:    vse8.v v8, (a1)
365; CHECK-NEXT:    ret
366entry:
367  %0 = load <16 x i8>, ptr %in, align 1
368  %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <2 x i32> <i32 0, i32 8>
369  store <2 x i8> %shuffle.i5, ptr %out, align 1
370  ret void
371}
372