xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll (revision 143c33c6dfd68f4e61d8e75c512bfdff02a7c687)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
4
5define <4 x i16> @shuffle_v4i16(<4 x i16> %x, <4 x i16> %y) {
6; CHECK-LABEL: shuffle_v4i16:
7; CHECK:       # %bb.0:
8; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
9; CHECK-NEXT:    vmv.v.i v0, 11
10; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
11; CHECK-NEXT:    ret
12  %s = shufflevector <4 x i16> %x, <4 x i16> %y, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
13  ret <4 x i16> %s
14}
15
16define <8 x i32> @shuffle_v8i32(<8 x i32> %x, <8 x i32> %y) {
17; CHECK-LABEL: shuffle_v8i32:
18; CHECK:       # %bb.0:
19; CHECK-NEXT:    li a0, 203
20; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
21; CHECK-NEXT:    vmv.s.x v0, a0
22; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
23; CHECK-NEXT:    ret
24  %s = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 13, i32 6, i32 7>
25  ret <8 x i32> %s
26}
27
28define <4 x i16> @shuffle_xv_v4i16(<4 x i16> %x) {
29; CHECK-LABEL: shuffle_xv_v4i16:
30; CHECK:       # %bb.0:
31; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
32; CHECK-NEXT:    vmv.v.i v0, 9
33; CHECK-NEXT:    vmerge.vim v8, v8, 5, v0
34; CHECK-NEXT:    ret
35  %s = shufflevector <4 x i16> <i16 5, i16 5, i16 5, i16 5>, <4 x i16> %x, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
36  ret <4 x i16> %s
37}
38
39define <4 x i16> @shuffle_vx_v4i16(<4 x i16> %x) {
40; CHECK-LABEL: shuffle_vx_v4i16:
41; CHECK:       # %bb.0:
42; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
43; CHECK-NEXT:    vmv.v.i v0, 6
44; CHECK-NEXT:    vmerge.vim v8, v8, 5, v0
45; CHECK-NEXT:    ret
46  %s = shufflevector <4 x i16> %x, <4 x i16> <i16 5, i16 5, i16 5, i16 5>, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
47  ret <4 x i16> %s
48}
49
50define <4 x i16> @vrgather_permute_shuffle_vu_v4i16(<4 x i16> %x) {
51; CHECK-LABEL: vrgather_permute_shuffle_vu_v4i16:
52; CHECK:       # %bb.0:
53; CHECK-NEXT:    lui a0, 4096
54; CHECK-NEXT:    addi a0, a0, 513
55; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
56; CHECK-NEXT:    vmv.s.x v9, a0
57; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
58; CHECK-NEXT:    vsext.vf2 v10, v9
59; CHECK-NEXT:    vrgather.vv v9, v8, v10
60; CHECK-NEXT:    vmv1r.v v8, v9
61; CHECK-NEXT:    ret
62  %s = shufflevector <4 x i16> %x, <4 x i16> poison, <4 x i32> <i32 1, i32 2, i32 0, i32 1>
63  ret <4 x i16> %s
64}
65
66define <4 x i16> @vrgather_permute_shuffle_uv_v4i16(<4 x i16> %x) {
67; CHECK-LABEL: vrgather_permute_shuffle_uv_v4i16:
68; CHECK:       # %bb.0:
69; CHECK-NEXT:    lui a0, 4096
70; CHECK-NEXT:    addi a0, a0, 513
71; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
72; CHECK-NEXT:    vmv.s.x v9, a0
73; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
74; CHECK-NEXT:    vsext.vf2 v10, v9
75; CHECK-NEXT:    vrgather.vv v9, v8, v10
76; CHECK-NEXT:    vmv1r.v v8, v9
77; CHECK-NEXT:    ret
78  %s = shufflevector <4 x i16> poison, <4 x i16> %x, <4 x i32> <i32 5, i32 6, i32 4, i32 5>
79  ret <4 x i16> %s
80}
81
82define <4 x i16> @vrgather_shuffle_vv_v4i16(<4 x i16> %x, <4 x i16> %y) {
83; CHECK-LABEL: vrgather_shuffle_vv_v4i16:
84; CHECK:       # %bb.0:
85; CHECK-NEXT:    lui a0, %hi(.LCPI6_0)
86; CHECK-NEXT:    addi a0, a0, %lo(.LCPI6_0)
87; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
88; CHECK-NEXT:    vle16.v v11, (a0)
89; CHECK-NEXT:    vmv.v.i v0, 8
90; CHECK-NEXT:    vrgather.vv v10, v8, v11
91; CHECK-NEXT:    vrgather.vi v10, v9, 1, v0.t
92; CHECK-NEXT:    vmv1r.v v8, v10
93; CHECK-NEXT:    ret
94  %s = shufflevector <4 x i16> %x, <4 x i16> %y, <4 x i32> <i32 1, i32 2, i32 0, i32 5>
95  ret <4 x i16> %s
96}
97
98define <4 x i16> @vrgather_shuffle_xv_v4i16(<4 x i16> %x) {
99; CHECK-LABEL: vrgather_shuffle_xv_v4i16:
100; CHECK:       # %bb.0:
101; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
102; CHECK-NEXT:    vid.v v9
103; CHECK-NEXT:    vmv.v.i v0, 12
104; CHECK-NEXT:    vrsub.vi v10, v9, 4
105; CHECK-NEXT:    vmv.v.i v9, 5
106; CHECK-NEXT:    vrgather.vv v9, v8, v10, v0.t
107; CHECK-NEXT:    vmv1r.v v8, v9
108; CHECK-NEXT:    ret
109  %s = shufflevector <4 x i16> <i16 5, i16 5, i16 5, i16 5>, <4 x i16> %x, <4 x i32> <i32 0, i32 3, i32 6, i32 5>
110  ret <4 x i16> %s
111}
112
113define <4 x i16> @vrgather_shuffle_vx_v4i16(<4 x i16> %x) {
114; CHECK-LABEL: vrgather_shuffle_vx_v4i16:
115; CHECK:       # %bb.0:
116; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
117; CHECK-NEXT:    vmv.v.i v9, 9
118; CHECK-NEXT:    vmv.v.i v0, 3
119; CHECK-NEXT:    vcompress.vm v10, v8, v9
120; CHECK-NEXT:    vmv.v.i v8, 5
121; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
122; CHECK-NEXT:    ret
123  %s = shufflevector <4 x i16> %x, <4 x i16> <i16 5, i16 5, i16 5, i16 5>, <4 x i32> <i32 0, i32 3, i32 6, i32 5>
124  ret <4 x i16> %s
125}
126
127define <8 x i64> @vrgather_permute_shuffle_vu_v8i64(<8 x i64> %x) {
128; CHECK-LABEL: vrgather_permute_shuffle_vu_v8i64:
129; CHECK:       # %bb.0:
130; CHECK-NEXT:    lui a0, %hi(.LCPI9_0)
131; CHECK-NEXT:    addi a0, a0, %lo(.LCPI9_0)
132; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
133; CHECK-NEXT:    vle16.v v16, (a0)
134; CHECK-NEXT:    vrgatherei16.vv v12, v8, v16
135; CHECK-NEXT:    vmv.v.v v8, v12
136; CHECK-NEXT:    ret
137  %s = shufflevector <8 x i64> %x, <8 x i64> poison, <8 x i32> <i32 1, i32 2, i32 0, i32 1, i32 7, i32 6, i32 0, i32 1>
138  ret <8 x i64> %s
139}
140
141define <8 x i64> @vrgather_permute_shuffle_uv_v8i64(<8 x i64> %x) {
142; CHECK-LABEL: vrgather_permute_shuffle_uv_v8i64:
143; CHECK:       # %bb.0:
144; CHECK-NEXT:    lui a0, %hi(.LCPI10_0)
145; CHECK-NEXT:    addi a0, a0, %lo(.LCPI10_0)
146; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
147; CHECK-NEXT:    vle16.v v16, (a0)
148; CHECK-NEXT:    vrgatherei16.vv v12, v8, v16
149; CHECK-NEXT:    vmv.v.v v8, v12
150; CHECK-NEXT:    ret
151  %s = shufflevector <8 x i64> poison, <8 x i64> %x, <8 x i32> <i32 9, i32 10, i32 8, i32 9, i32 15, i32 8, i32 8, i32 11>
152  ret <8 x i64> %s
153}
154
155define <8 x i64> @vrgather_shuffle_vv_v8i64(<8 x i64> %x, <8 x i64> %y) {
156; RV32-LABEL: vrgather_shuffle_vv_v8i64:
157; RV32:       # %bb.0:
158; RV32-NEXT:    lui a0, %hi(.LCPI11_0)
159; RV32-NEXT:    addi a0, a0, %lo(.LCPI11_0)
160; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
161; RV32-NEXT:    vle16.v v20, (a0)
162; RV32-NEXT:    vmv.v.i v21, 2
163; RV32-NEXT:    li a0, 164
164; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
165; RV32-NEXT:    vrgatherei16.vv v16, v8, v20
166; RV32-NEXT:    vmv.s.x v0, a0
167; RV32-NEXT:    li a0, 5
168; RV32-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
169; RV32-NEXT:    vslide1down.vx v8, v21, a0
170; RV32-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
171; RV32-NEXT:    vrgatherei16.vv v16, v12, v8, v0.t
172; RV32-NEXT:    vmv.v.v v8, v16
173; RV32-NEXT:    ret
174;
175; RV64-LABEL: vrgather_shuffle_vv_v8i64:
176; RV64:       # %bb.0:
177; RV64-NEXT:    li a0, 164
178; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
179; RV64-NEXT:    vmv.s.x v0, a0
180; RV64-NEXT:    lui a0, 327683
181; RV64-NEXT:    slli a0, a0, 3
182; RV64-NEXT:    addi a0, a0, 1
183; RV64-NEXT:    slli a0, a0, 17
184; RV64-NEXT:    addi a0, a0, 1
185; RV64-NEXT:    vmv.v.x v20, a0
186; RV64-NEXT:    lui a0, 163841
187; RV64-NEXT:    slli a0, a0, 4
188; RV64-NEXT:    addi a0, a0, 1
189; RV64-NEXT:    slli a0, a0, 17
190; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
191; RV64-NEXT:    vrgatherei16.vv v16, v8, v20
192; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
193; RV64-NEXT:    vmv.v.x v8, a0
194; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
195; RV64-NEXT:    vrgatherei16.vv v16, v12, v8, v0.t
196; RV64-NEXT:    vmv.v.v v8, v16
197; RV64-NEXT:    ret
198  %s = shufflevector <8 x i64> %x, <8 x i64> %y, <8 x i32> <i32 1, i32 2, i32 10, i32 5, i32 1, i32 10, i32 3, i32 13>
199  ret <8 x i64> %s
200}
201
202define <8 x i64> @vrgather_shuffle_xv_v8i64(<8 x i64> %x) {
203; RV32-LABEL: vrgather_shuffle_xv_v8i64:
204; RV32:       # %bb.0:
205; RV32-NEXT:    lui a0, %hi(.LCPI12_0)
206; RV32-NEXT:    addi a0, a0, %lo(.LCPI12_0)
207; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
208; RV32-NEXT:    vmv.v.i v16, -1
209; RV32-NEXT:    vle16.v v20, (a0)
210; RV32-NEXT:    lui a0, %hi(.LCPI12_1)
211; RV32-NEXT:    addi a0, a0, %lo(.LCPI12_1)
212; RV32-NEXT:    vle16.v v21, (a0)
213; RV32-NEXT:    li a0, 113
214; RV32-NEXT:    vmv.s.x v0, a0
215; RV32-NEXT:    vrgatherei16.vv v12, v16, v20
216; RV32-NEXT:    vrgatherei16.vv v12, v8, v21, v0.t
217; RV32-NEXT:    vmv.v.v v8, v12
218; RV32-NEXT:    ret
219;
220; RV64-LABEL: vrgather_shuffle_xv_v8i64:
221; RV64:       # %bb.0:
222; RV64-NEXT:    li a0, 113
223; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
224; RV64-NEXT:    vmv.s.x v0, a0
225; RV64-NEXT:    lui a0, 98305
226; RV64-NEXT:    slli a0, a0, 6
227; RV64-NEXT:    vmv.v.x v16, a0
228; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
229; RV64-NEXT:    vmv.v.i v12, -1
230; RV64-NEXT:    vrgatherei16.vv v12, v8, v16, v0.t
231; RV64-NEXT:    vmv.v.v v8, v12
232; RV64-NEXT:    ret
233  %s = shufflevector <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, <8 x i64> %x, <8 x i32> <i32 8, i32 3, i32 6, i32 5, i32 8, i32 12, i32 14, i32 3>
234  ret <8 x i64> %s
235}
236
237define <8 x i64> @vrgather_shuffle_vx_v8i64(<8 x i64> %x) {
238; RV32-LABEL: vrgather_shuffle_vx_v8i64:
239; RV32:       # %bb.0:
240; RV32-NEXT:    lui a0, %hi(.LCPI13_0)
241; RV32-NEXT:    addi a0, a0, %lo(.LCPI13_0)
242; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
243; RV32-NEXT:    vle16.v v16, (a0)
244; RV32-NEXT:    lui a0, %hi(.LCPI13_1)
245; RV32-NEXT:    addi a0, a0, %lo(.LCPI13_1)
246; RV32-NEXT:    vle16.v v17, (a0)
247; RV32-NEXT:    li a0, 140
248; RV32-NEXT:    vmv.s.x v0, a0
249; RV32-NEXT:    vrgatherei16.vv v12, v8, v16
250; RV32-NEXT:    vmv.v.i v8, 5
251; RV32-NEXT:    vrgatherei16.vv v12, v8, v17, v0.t
252; RV32-NEXT:    vmv.v.v v8, v12
253; RV32-NEXT:    ret
254;
255; RV64-LABEL: vrgather_shuffle_vx_v8i64:
256; RV64:       # %bb.0:
257; RV64-NEXT:    lui a0, %hi(.LCPI13_0)
258; RV64-NEXT:    addi a0, a0, %lo(.LCPI13_0)
259; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
260; RV64-NEXT:    vle16.v v16, (a0)
261; RV64-NEXT:    li a0, 115
262; RV64-NEXT:    vmv.s.x v0, a0
263; RV64-NEXT:    vmv.v.i v12, 5
264; RV64-NEXT:    vrgatherei16.vv v12, v8, v16, v0.t
265; RV64-NEXT:    vmv.v.v v8, v12
266; RV64-NEXT:    ret
267  %s = shufflevector <8 x i64> %x, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i32> <i32 0, i32 3, i32 10, i32 9, i32 4, i32 1, i32 7, i32 14>
268  ret <8 x i64> %s
269}
270
271define <4 x i16> @shuffle_v8i16_to_vslidedown_1(<8 x i16> %x) {
272; CHECK-LABEL: shuffle_v8i16_to_vslidedown_1:
273; CHECK:       # %bb.0: # %entry
274; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
275; CHECK-NEXT:    vslidedown.vi v8, v8, 1
276; CHECK-NEXT:    ret
277entry:
278  %s = shufflevector <8 x i16> %x, <8 x i16> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
279  ret <4 x i16> %s
280}
281
282define <4 x i16> @shuffle_v8i16_to_vslidedown_3(<8 x i16> %x) {
283; CHECK-LABEL: shuffle_v8i16_to_vslidedown_3:
284; CHECK:       # %bb.0: # %entry
285; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
286; CHECK-NEXT:    vslidedown.vi v8, v8, 3
287; CHECK-NEXT:    ret
288entry:
289  %s = shufflevector <8 x i16> %x, <8 x i16> poison, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
290  ret <4 x i16> %s
291}
292
293define <2 x i32> @shuffle_v4i32_to_vslidedown(<4 x i32> %x) {
294; CHECK-LABEL: shuffle_v4i32_to_vslidedown:
295; CHECK:       # %bb.0: # %entry
296; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
297; CHECK-NEXT:    vslidedown.vi v8, v8, 1
298; CHECK-NEXT:    ret
299entry:
300  %s = shufflevector <4 x i32> %x, <4 x i32> poison, <2 x i32> <i32 1, i32 2>
301  ret <2 x i32> %s
302}
303
304define <4 x i8> @interleave_shuffles(<4 x i8> %x) {
305; CHECK-LABEL: interleave_shuffles:
306; CHECK:       # %bb.0:
307; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
308; CHECK-NEXT:    vrgather.vi v9, v8, 0
309; CHECK-NEXT:    vrgather.vi v10, v8, 1
310; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
311; CHECK-NEXT:    vwaddu.vv v8, v9, v10
312; CHECK-NEXT:    li a0, -1
313; CHECK-NEXT:    vwmaccu.vx v8, a0, v10
314; CHECK-NEXT:    ret
315  %y = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
316  %z = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
317  %w = shufflevector <4 x i8> %y, <4 x i8> %z, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
318  ret <4 x i8> %w
319}
320
321define <8 x i8> @splat_ve4(<8 x i8> %v) {
322; CHECK-LABEL: splat_ve4:
323; CHECK:       # %bb.0:
324; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
325; CHECK-NEXT:    vrgather.vi v9, v8, 4
326; CHECK-NEXT:    vmv1r.v v8, v9
327; CHECK-NEXT:    ret
328  %shuff = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
329  ret <8 x i8> %shuff
330}
331
332define <8 x i8> @splat_ve4_ins_i0ve2(<8 x i8> %v) {
333; CHECK-LABEL: splat_ve4_ins_i0ve2:
334; CHECK:       # %bb.0:
335; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
336; CHECK-NEXT:    vmv.v.i v10, 4
337; CHECK-NEXT:    li a0, 2
338; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, tu, ma
339; CHECK-NEXT:    vmv.s.x v10, a0
340; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
341; CHECK-NEXT:    vrgather.vv v9, v8, v10
342; CHECK-NEXT:    vmv1r.v v8, v9
343; CHECK-NEXT:    ret
344  %shuff = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> <i32 2, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
345  ret <8 x i8> %shuff
346}
347
348define <8 x i8> @splat_ve4_ins_i1ve3(<8 x i8> %v) {
349; CHECK-LABEL: splat_ve4_ins_i1ve3:
350; CHECK:       # %bb.0:
351; CHECK-NEXT:    vsetivli zero, 2, e8, mf2, ta, ma
352; CHECK-NEXT:    vmv.v.i v9, 3
353; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
354; CHECK-NEXT:    vmv.v.i v10, 4
355; CHECK-NEXT:    vsetivli zero, 2, e8, mf2, tu, ma
356; CHECK-NEXT:    vslideup.vi v10, v9, 1
357; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
358; CHECK-NEXT:    vrgather.vv v9, v8, v10
359; CHECK-NEXT:    vmv1r.v v8, v9
360; CHECK-NEXT:    ret
361  %shuff = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> <i32 4, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
362  ret <8 x i8> %shuff
363}
364
365define <8 x i8> @splat_ve2_we0(<8 x i8> %v, <8 x i8> %w) {
366; CHECK-LABEL: splat_ve2_we0:
367; CHECK:       # %bb.0:
368; CHECK-NEXT:    li a0, 66
369; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
370; CHECK-NEXT:    vmv.s.x v0, a0
371; CHECK-NEXT:    vrgather.vi v10, v8, 2
372; CHECK-NEXT:    vrgather.vi v10, v9, 0, v0.t
373; CHECK-NEXT:    vmv1r.v v8, v10
374; CHECK-NEXT:    ret
375  %shuff = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 2, i32 8, i32 2, i32 2, i32 2, i32 2, i32 8, i32 2>
376  ret <8 x i8> %shuff
377}
378
379define <8 x i8> @splat_ve2_we0_ins_i0ve4(<8 x i8> %v, <8 x i8> %w) {
380; CHECK-LABEL: splat_ve2_we0_ins_i0ve4:
381; CHECK:       # %bb.0:
382; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
383; CHECK-NEXT:    vmv.v.i v11, 2
384; CHECK-NEXT:    li a0, 4
385; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, tu, ma
386; CHECK-NEXT:    vmv.s.x v11, a0
387; CHECK-NEXT:    li a0, 66
388; CHECK-NEXT:    vmv.s.x v0, a0
389; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
390; CHECK-NEXT:    vrgather.vv v10, v8, v11
391; CHECK-NEXT:    vrgather.vi v10, v9, 0, v0.t
392; CHECK-NEXT:    vmv1r.v v8, v10
393; CHECK-NEXT:    ret
394  %shuff = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 4, i32 8, i32 2, i32 2, i32 2, i32 2, i32 8, i32 2>
395  ret <8 x i8> %shuff
396}
397
398define <8 x i8> @splat_ve2_we0_ins_i0we4(<8 x i8> %v, <8 x i8> %w) {
399; CHECK-LABEL: splat_ve2_we0_ins_i0we4:
400; CHECK:       # %bb.0:
401; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
402; CHECK-NEXT:    vrgather.vi v10, v8, 2
403; CHECK-NEXT:    li a0, 67
404; CHECK-NEXT:    vmv.s.x v0, a0
405; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
406; CHECK-NEXT:    vmv.v.i v8, 4
407; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
408; CHECK-NEXT:    vrgather.vv v10, v9, v8, v0.t
409; CHECK-NEXT:    vmv1r.v v8, v10
410; CHECK-NEXT:    ret
411  %shuff = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 12, i32 8, i32 2, i32 2, i32 2, i32 2, i32 8, i32 2>
412  ret <8 x i8> %shuff
413}
414
415define <8 x i8> @splat_ve2_we0_ins_i2ve4(<8 x i8> %v, <8 x i8> %w) {
416; CHECK-LABEL: splat_ve2_we0_ins_i2ve4:
417; CHECK:       # %bb.0:
418; CHECK-NEXT:    lui a0, 8256
419; CHECK-NEXT:    addi a0, a0, 514
420; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
421; CHECK-NEXT:    vmv.v.x v11, a0
422; CHECK-NEXT:    li a0, 66
423; CHECK-NEXT:    vmv.s.x v0, a0
424; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
425; CHECK-NEXT:    vrgather.vv v10, v8, v11
426; CHECK-NEXT:    vrgather.vi v10, v9, 0, v0.t
427; CHECK-NEXT:    vmv1r.v v8, v10
428; CHECK-NEXT:    ret
429  %shuff = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 2, i32 8, i32 4, i32 2, i32 2, i32 2, i32 8, i32 2>
430  ret <8 x i8> %shuff
431}
432
433define <8 x i8> @splat_ve2_we0_ins_i2we4(<8 x i8> %v, <8 x i8> %w) {
434; CHECK-LABEL: splat_ve2_we0_ins_i2we4:
435; CHECK:       # %bb.0:
436; CHECK-NEXT:    vsetivli zero, 3, e8, mf2, ta, ma
437; CHECK-NEXT:    vmv.v.i v10, 4
438; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
439; CHECK-NEXT:    vmv.v.i v11, 0
440; CHECK-NEXT:    li a0, 70
441; CHECK-NEXT:    vsetivli zero, 3, e8, mf2, tu, ma
442; CHECK-NEXT:    vslideup.vi v11, v10, 2
443; CHECK-NEXT:    vmv.s.x v0, a0
444; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
445; CHECK-NEXT:    vrgather.vi v10, v8, 2
446; CHECK-NEXT:    vrgather.vv v10, v9, v11, v0.t
447; CHECK-NEXT:    vmv1r.v v8, v10
448; CHECK-NEXT:    ret
449  %shuff = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 2, i32 8, i32 12, i32 2, i32 2, i32 2, i32 8, i32 2>
450  ret <8 x i8> %shuff
451}
452
453define <8 x i8> @splat_ve2_we0_ins_i2ve4_i5we6(<8 x i8> %v, <8 x i8> %w) {
454; CHECK-LABEL: splat_ve2_we0_ins_i2ve4_i5we6:
455; CHECK:       # %bb.0:
456; CHECK-NEXT:    lui a0, %hi(.LCPI26_0)
457; CHECK-NEXT:    addi a0, a0, %lo(.LCPI26_0)
458; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
459; CHECK-NEXT:    vle8.v v10, (a0)
460; CHECK-NEXT:    li a0, 20
461; CHECK-NEXT:    vmv.s.x v0, a0
462; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
463; CHECK-NEXT:    vrgather.vv v8, v9, v10
464; CHECK-NEXT:    ret
465  %shuff = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 2, i32 8, i32 4, i32 2, i32 2, i32 14, i32 8, i32 2>
466  ret <8 x i8> %shuff
467}
468
469define <8 x i8> @widen_splat_ve3(<4 x i8> %v) {
470; CHECK-LABEL: widen_splat_ve3:
471; CHECK:       # %bb.0:
472; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
473; CHECK-NEXT:    vrgather.vi v9, v8, 3
474; CHECK-NEXT:    vmv1r.v v8, v9
475; CHECK-NEXT:    ret
476  %shuf = shufflevector <4 x i8> %v, <4 x i8> poison, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
477  ret <8 x i8> %shuf
478}
479
480define <4 x i16> @slidedown_v4i16(<4 x i16> %x) {
481; CHECK-LABEL: slidedown_v4i16:
482; CHECK:       # %bb.0:
483; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
484; CHECK-NEXT:    vslidedown.vi v8, v8, 1
485; CHECK-NEXT:    ret
486  %s = shufflevector <4 x i16> %x, <4 x i16> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 undef>
487  ret <4 x i16> %s
488}
489
490define <8 x i32> @slidedown_v8i32(<8 x i32> %x) {
491; CHECK-LABEL: slidedown_v8i32:
492; CHECK:       # %bb.0:
493; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
494; CHECK-NEXT:    vslidedown.vi v8, v8, 3
495; CHECK-NEXT:    ret
496  %s = shufflevector <8 x i32> %x, <8 x i32> poison, <8 x i32> <i32 3, i32 undef, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>
497  ret <8 x i32> %s
498}
499
500define <4 x i16> @slideup_v4i16(<4 x i16> %x) {
501; CHECK-LABEL: slideup_v4i16:
502; CHECK:       # %bb.0:
503; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
504; CHECK-NEXT:    vslideup.vi v9, v8, 1
505; CHECK-NEXT:    vmv1r.v v8, v9
506; CHECK-NEXT:    ret
507  %s = shufflevector <4 x i16> %x, <4 x i16> poison, <4 x i32> <i32 undef, i32 0, i32 1, i32 2>
508  ret <4 x i16> %s
509}
510
511define <8 x i32> @slideup_v8i32(<8 x i32> %x) {
512; CHECK-LABEL: slideup_v8i32:
513; CHECK:       # %bb.0:
514; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
515; CHECK-NEXT:    vslideup.vi v10, v8, 3
516; CHECK-NEXT:    vmv.v.v v8, v10
517; CHECK-NEXT:    ret
518  %s = shufflevector <8 x i32> %x, <8 x i32> poison, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 4>
519  ret <8 x i32> %s
520}
521
522define <8 x i16> @splice_unary(<8 x i16> %x) {
523; CHECK-LABEL: splice_unary:
524; CHECK:       # %bb.0:
525; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
526; CHECK-NEXT:    vslidedown.vi v9, v8, 2
527; CHECK-NEXT:    vslideup.vi v9, v8, 6
528; CHECK-NEXT:    vmv.v.v v8, v9
529; CHECK-NEXT:    ret
530  %s = shufflevector <8 x i16> %x, <8 x i16> poison, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1>
531  ret <8 x i16> %s
532}
533
534define <8 x i32> @splice_unary2(<8 x i32> %x) {
535; CHECK-LABEL: splice_unary2:
536; CHECK:       # %bb.0:
537; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
538; CHECK-NEXT:    vslidedown.vi v10, v8, 5
539; CHECK-NEXT:    vslideup.vi v10, v8, 3
540; CHECK-NEXT:    vmv.v.v v8, v10
541; CHECK-NEXT:    ret
542  %s = shufflevector <8 x i32> %x, <8 x i32> poison, <8 x i32> <i32 undef, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4>
543  ret <8 x i32> %s
544}
545
546define <8 x i16> @splice_binary(<8 x i16> %x, <8 x i16> %y) {
547; CHECK-LABEL: splice_binary:
548; CHECK:       # %bb.0:
549; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
550; CHECK-NEXT:    vslidedown.vi v8, v8, 2
551; CHECK-NEXT:    vslideup.vi v8, v9, 6
552; CHECK-NEXT:    ret
553  %s = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 9>
554  ret <8 x i16> %s
555}
556
557define <8 x i32> @splice_binary2(<8 x i32> %x, <8 x i32> %y) {
558; CHECK-LABEL: splice_binary2:
559; CHECK:       # %bb.0:
560; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
561; CHECK-NEXT:    vslidedown.vi v8, v8, 5
562; CHECK-NEXT:    vslideup.vi v8, v10, 3
563; CHECK-NEXT:    ret
564  %s = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
565  ret <8 x i32> %s
566}
567
568define <4 x i16> @shuffle_shuffle_vslidedown(<16 x i16> %0) {
569; CHECK-LABEL: shuffle_shuffle_vslidedown:
570; CHECK:       # %bb.0: # %entry
571; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
572; CHECK-NEXT:    vslidedown.vi v8, v8, 5
573; CHECK-NEXT:    ret
574entry:
575  %1 = shufflevector <16 x i16> %0, <16 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
576  %2 = shufflevector <16 x i16> %0, <16 x i16> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
577  %3 = shufflevector <8 x i16> %1, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
578  %4 = shufflevector <8 x i16> %2, <8 x i16> poison, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
579  %5 = shufflevector <4 x i16> %3, <4 x i16> %4, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
580  ret <4 x i16> %5
581}
582
583define <8 x i8> @concat_4xi8_start(<8 x i8> %v, <8 x i8> %w) {
584; CHECK-LABEL: concat_4xi8_start:
585; CHECK:       # %bb.0:
586; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
587; CHECK-NEXT:    vslideup.vi v8, v9, 4
588; CHECK-NEXT:    ret
589  %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
590  ret <8 x i8> %res
591}
592
593define <8 x i8> @concat_4xi8_start_undef(<8 x i8> %v, <8 x i8> %w) {
594; CHECK-LABEL: concat_4xi8_start_undef:
595; CHECK:       # %bb.0:
596; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
597; CHECK-NEXT:    vslideup.vi v8, v9, 4
598; CHECK-NEXT:    ret
599  %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 undef, i32 10, i32 11>
600  ret <8 x i8> %res
601}
602
603define <8 x i8> @concat_4xi8_start_undef_at_start(<8 x i8> %v, <8 x i8> %w) {
604; CHECK-LABEL: concat_4xi8_start_undef_at_start:
605; CHECK:       # %bb.0:
606; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
607; CHECK-NEXT:    vslideup.vi v8, v9, 2
608; CHECK-NEXT:    ret
609  %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 9, i32 10, i32 11>
610  ret <8 x i8> %res
611}
612
613define <8 x i8> @merge_start_into_end_non_contiguous(<8 x i8> %v, <8 x i8> %w) {
614; CHECK-LABEL: merge_start_into_end_non_contiguous:
615; CHECK:       # %bb.0:
616; CHECK-NEXT:    li a0, 144
617; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
618; CHECK-NEXT:    vmv.s.x v0, a0
619; CHECK-NEXT:    vslideup.vi v8, v9, 4, v0.t
620; CHECK-NEXT:    ret
621  %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 11>
622  ret <8 x i8> %res
623}
624
625define <8 x i8> @merge_end_into_end(<8 x i8> %v, <8 x i8> %w) {
626; CHECK-LABEL: merge_end_into_end:
627; CHECK:       # %bb.0:
628; CHECK-NEXT:    vsetivli zero, 4, e8, mf2, tu, ma
629; CHECK-NEXT:    vmv.v.v v9, v8
630; CHECK-NEXT:    vmv1r.v v8, v9
631; CHECK-NEXT:    ret
632  %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
633  ret <8 x i8> %res
634}
635
636define <8 x i8> @merge_start_into_middle(<8 x i8> %v, <8 x i8> %w) {
637; CHECK-LABEL: merge_start_into_middle:
638; CHECK:       # %bb.0:
639; CHECK-NEXT:    vsetivli zero, 5, e8, mf2, tu, ma
640; CHECK-NEXT:    vslideup.vi v8, v9, 1
641; CHECK-NEXT:    ret
642  %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 8, i32 9, i32 10, i32 11, i32 5, i32 6, i32 7>
643  ret <8 x i8> %res
644}
645
646define <8 x i8> @merge_start_into_start(<8 x i8> %v, <8 x i8> %w) {
647; CHECK-LABEL: merge_start_into_start:
648; CHECK:       # %bb.0:
649; CHECK-NEXT:    vsetivli zero, 4, e8, mf2, tu, ma
650; CHECK-NEXT:    vmv.v.v v8, v9
651; CHECK-NEXT:    ret
652  %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
653  ret <8 x i8> %res
654}
655
656define <8 x i8> @merge_slidedown(<8 x i8> %v, <8 x i8> %w) {
657; CHECK-LABEL: merge_slidedown:
658; CHECK:       # %bb.0:
659; CHECK-NEXT:    li a0, 60
660; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
661; CHECK-NEXT:    vmv.s.x v0, a0
662; CHECK-NEXT:    vslidedown.vi v9, v8, 1, v0.t
663; CHECK-NEXT:    vmv1r.v v8, v9
664; CHECK-NEXT:    ret
665  %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 8, i32 9, i32 3, i32 4, i32 5, i32 6, i32 14, i32 15>
666  ret <8 x i8> %res
667}
668
669; This should slide %v down by 2 and %w up by 1 before merging them
670define <8 x i8> @merge_non_contiguous_slideup_slidedown(<8 x i8> %v, <8 x i8> %w) {
671; CHECK-LABEL: merge_non_contiguous_slideup_slidedown:
672; CHECK:       # %bb.0:
673; CHECK-NEXT:    li a0, -22
674; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
675; CHECK-NEXT:    vmv.s.x v0, a0
676; CHECK-NEXT:    vslidedown.vi v8, v8, 2
677; CHECK-NEXT:    vslideup.vi v8, v9, 1, v0.t
678; CHECK-NEXT:    ret
679  %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 2, i32 8, i32 4, i32 10, i32 6, i32 12, i32 13, i32 14>
680  ret <8 x i8> %res
681}
682
683; This shouldn't generate a vmerge because the elements of %w are not consecutive
684define <8 x i8> @unmergable(<8 x i8> %v, <8 x i8> %w) {
685; CHECK-LABEL: unmergable:
686; CHECK:       # %bb.0:
687; CHECK-NEXT:    lui a0, %hi(.LCPI46_0)
688; CHECK-NEXT:    addi a0, a0, %lo(.LCPI46_0)
689; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
690; CHECK-NEXT:    vle8.v v10, (a0)
691; CHECK-NEXT:    li a0, 84
692; CHECK-NEXT:    vmv.s.x v0, a0
693; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
694; CHECK-NEXT:    vrgather.vv v8, v9, v10
695; CHECK-NEXT:    ret
696  %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 2, i32 9, i32 4, i32 11, i32 6, i32 13, i32 8, i32 15>
697  ret <8 x i8> %res
698}
699
700; Make sure we use a vmv.v.i to load the mask constant.
701define <8 x i32> @shuffle_v8i32_2(<8 x i32> %x, <8 x i32> %y) {
702; CHECK-LABEL: shuffle_v8i32_2:
703; CHECK:       # %bb.0:
704; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
705; CHECK-NEXT:    vmv.v.i v0, 13
706; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
707; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
708; CHECK-NEXT:    ret
709  %s = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
710  ret <8 x i32> %s
711}
712
713; FIXME: This could be expressed as a vrgather.vv
714define <8 x i8> @shuffle_v64i8_v8i8(<64 x i8> %wide.vec) {
715; CHECK-LABEL: shuffle_v64i8_v8i8:
716; CHECK:       # %bb.0:
717; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
718; CHECK-NEXT:    vnsrl.wi v12, v8, 0
719; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
720; CHECK-NEXT:    vnsrl.wi v8, v12, 0
721; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
722; CHECK-NEXT:    vnsrl.wi v8, v8, 0
723; CHECK-NEXT:    ret
724  %s = shufflevector <64 x i8> %wide.vec, <64 x i8> poison, <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 40, i32 48, i32 56>
725  ret <8 x i8> %s
726}
727
728define <8 x i8> @shuffle_compress_singlesrc_e8(<8 x i8> %v) {
729; CHECK-LABEL: shuffle_compress_singlesrc_e8:
730; CHECK:       # %bb.0:
731; CHECK-NEXT:    li a0, 181
732; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
733; CHECK-NEXT:    vmv.s.x v10, a0
734; CHECK-NEXT:    vcompress.vm v9, v8, v10
735; CHECK-NEXT:    vmv1r.v v8, v9
736; CHECK-NEXT:    ret
737  %out = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 5, i32 7, i32 undef, i32 undef, i32 undef>
738  ret <8 x i8> %out
739}
740
741define <8 x i16> @shuffle_compress_singlesrc_e16(<8 x i16> %v) {
742; CHECK-LABEL: shuffle_compress_singlesrc_e16:
743; CHECK:       # %bb.0:
744; CHECK-NEXT:    li a0, 181
745; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
746; CHECK-NEXT:    vmv.s.x v10, a0
747; CHECK-NEXT:    vcompress.vm v9, v8, v10
748; CHECK-NEXT:    vmv.v.v v8, v9
749; CHECK-NEXT:    ret
750  %out = shufflevector <8 x i16> %v, <8 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 5, i32 7, i32 undef, i32 undef, i32 undef>
751  ret <8 x i16> %out
752}
753
754define <8 x i32> @shuffle_compress_singlesrc_e32(<8 x i32> %v) {
755; CHECK-LABEL: shuffle_compress_singlesrc_e32:
756; CHECK:       # %bb.0:
757; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
758; CHECK-NEXT:    vmv.v.i v12, 13
759; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
760; CHECK-NEXT:    vcompress.vm v10, v8, v12
761; CHECK-NEXT:    vmv.v.v v8, v10
762; CHECK-NEXT:    ret
763  %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef>
764  ret <8 x i32> %out
765}
766
767define <8 x i64> @shuffle_compress_singlesrc_e64(<8 x i64> %v) {
768; CHECK-LABEL: shuffle_compress_singlesrc_e64:
769; CHECK:       # %bb.0:
770; CHECK-NEXT:    li a0, 181
771; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
772; CHECK-NEXT:    vmv.s.x v16, a0
773; CHECK-NEXT:    vcompress.vm v12, v8, v16
774; CHECK-NEXT:    vmv.v.v v8, v12
775; CHECK-NEXT:    ret
776  %out = shufflevector <8 x i64> %v, <8 x i64> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 5, i32 7, i32 undef, i32 undef, i32 undef>
777  ret <8 x i64> %out
778}
779
780define <8 x i32> @shuffle_compress_singlesrc_gaps_e32(<8 x i32> %v) {
781; CHECK-LABEL: shuffle_compress_singlesrc_gaps_e32:
782; CHECK:       # %bb.0:
783; CHECK-NEXT:    lui a0, %hi(.LCPI53_0)
784; CHECK-NEXT:    addi a0, a0, %lo(.LCPI53_0)
785; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
786; CHECK-NEXT:    vle16.v v12, (a0)
787; CHECK-NEXT:    vrgatherei16.vv v10, v8, v12
788; CHECK-NEXT:    vmv.v.v v8, v10
789; CHECK-NEXT:    ret
790  %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> <i32 0, i32 undef, i32 4, i32 5, i32 7, i32 undef, i32 undef, i32 undef>
791  ret <8 x i32> %out
792}
793
794define <8 x i32> @shuffle_spread2_singlesrc_e32(<8 x i32> %v) {
795; CHECK-LABEL: shuffle_spread2_singlesrc_e32:
796; CHECK:       # %bb.0:
797; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
798; CHECK-NEXT:    vzext.vf2 v10, v8
799; CHECK-NEXT:    vmv.v.v v8, v10
800; CHECK-NEXT:    ret
801  %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef>
802  ret <8 x i32> %out
803}
804
805define <8 x i32> @shuffle_spread2_singlesrc_e32_index1(<8 x i32> %v) {
806; CHECK-LABEL: shuffle_spread2_singlesrc_e32_index1:
807; CHECK:       # %bb.0:
808; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
809; CHECK-NEXT:    vzext.vf2 v10, v8
810; CHECK-NEXT:    li a0, 32
811; CHECK-NEXT:    vsll.vx v8, v10, a0
812; CHECK-NEXT:    ret
813  %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3>
814  ret <8 x i32> %out
815}
816
817define <8 x i32> @shuffle_spread2_singlesrc_e32_index2(<8 x i32> %v) {
818; CHECK-LABEL: shuffle_spread2_singlesrc_e32_index2:
819; CHECK:       # %bb.0:
820; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
821; CHECK-NEXT:    vid.v v10
822; CHECK-NEXT:    vsrl.vi v10, v10, 1
823; CHECK-NEXT:    vadd.vi v12, v10, -1
824; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
825; CHECK-NEXT:    vrgatherei16.vv v10, v8, v12
826; CHECK-NEXT:    vmv.v.v v8, v10
827; CHECK-NEXT:    ret
828  %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> <i32 undef, i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef>
829  ret <8 x i32> %out
830}
831
832define <8 x i32> @shuffle_spread3_singlesrc_e32(<8 x i32> %v) {
833; CHECK-LABEL: shuffle_spread3_singlesrc_e32:
834; CHECK:       # %bb.0:
835; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
836; CHECK-NEXT:    vmv.v.i v10, 0
837; CHECK-NEXT:    li a0, 1
838; CHECK-NEXT:    vslide1down.vx v12, v10, a0
839; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
840; CHECK-NEXT:    vrgatherei16.vv v10, v8, v12
841; CHECK-NEXT:    vmv.v.v v8, v10
842; CHECK-NEXT:    ret
843  %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> <i32 0, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 2, i32 undef>
844  ret <8 x i32> %out
845}
846
847; TODO: This should be a single vslideup.vi
848define <8 x i32> @shuffle_spread4_singlesrc_e32(<8 x i32> %v) {
849; CHECK-LABEL: shuffle_spread4_singlesrc_e32:
850; CHECK:       # %bb.0:
851; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
852; CHECK-NEXT:    vid.v v10
853; CHECK-NEXT:    vsrl.vi v12, v10, 2
854; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
855; CHECK-NEXT:    vrgatherei16.vv v10, v8, v12
856; CHECK-NEXT:    vmv.v.v v8, v10
857; CHECK-NEXT:    ret
858  %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef>
859  ret <8 x i32> %out
860}
861
862define <16 x i8> @shuffle_spread4_singlesrc_e8_idx0(<16 x i8> %v) {
863; CHECK-LABEL: shuffle_spread4_singlesrc_e8_idx0:
864; CHECK:       # %bb.0:
865; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
866; CHECK-NEXT:    vzext.vf4 v9, v8
867; CHECK-NEXT:    vmv.v.v v8, v9
868; CHECK-NEXT:    ret
869  %out = shufflevector <16 x i8> %v, <16 x i8> poison, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef>
870  ret <16 x i8> %out
871}
872
873define <16 x i8> @shuffle_spread4_singlesrc_e8_idx1(<16 x i8> %v) {
874; CHECK-LABEL: shuffle_spread4_singlesrc_e8_idx1:
875; CHECK:       # %bb.0:
876; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
877; CHECK-NEXT:    vzext.vf4 v9, v8
878; CHECK-NEXT:    vsll.vi v8, v9, 8
879; CHECK-NEXT:    ret
880  %out = shufflevector <16 x i8> %v, <16 x i8> poison, <16 x i32> <i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef>
881  ret <16 x i8> %out
882}
883
884define <16 x i8> @shuffle_spread4_singlesrc_e8_idx2(<16 x i8> %v) {
885; CHECK-LABEL: shuffle_spread4_singlesrc_e8_idx2:
886; CHECK:       # %bb.0:
887; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
888; CHECK-NEXT:    vzext.vf4 v9, v8
889; CHECK-NEXT:    vsll.vi v8, v9, 16
890; CHECK-NEXT:    ret
891  %out = shufflevector <16 x i8> %v, <16 x i8> poison, <16 x i32> <i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3, i32 undef>
892  ret <16 x i8> %out
893}
894
895define <16 x i8> @shuffle_spread4_singlesrc_e8_idx3(<16 x i8> %v) {
896; CHECK-LABEL: shuffle_spread4_singlesrc_e8_idx3:
897; CHECK:       # %bb.0:
898; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
899; CHECK-NEXT:    vzext.vf4 v9, v8
900; CHECK-NEXT:    vsll.vi v8, v9, 24
901; CHECK-NEXT:    ret
902  %out = shufflevector <16 x i8> %v, <16 x i8> poison, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3>
903  ret <16 x i8> %out
904}
905
906define <16 x i8> @shuffle_spread4_singlesrc_e8_idx4(<16 x i8> %v) {
907; CHECK-LABEL: shuffle_spread4_singlesrc_e8_idx4:
908; CHECK:       # %bb.0:
909; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
910; CHECK-NEXT:    vid.v v9
911; CHECK-NEXT:    vsrl.vi v9, v9, 2
912; CHECK-NEXT:    vadd.vi v10, v9, -1
913; CHECK-NEXT:    vrgather.vv v9, v8, v10
914; CHECK-NEXT:    vmv.v.v v8, v9
915; CHECK-NEXT:    ret
916  %out = shufflevector <16 x i8> %v, <16 x i8> poison, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef>
917  ret <16 x i8> %out
918}
919
920
921define <32 x i8> @shuffle_spread8_singlesrc_e8(<32 x i8> %v) {
922; CHECK-LABEL: shuffle_spread8_singlesrc_e8:
923; CHECK:       # %bb.0:
924; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
925; CHECK-NEXT:    vzext.vf8 v10, v8
926; CHECK-NEXT:    vmv.v.v v8, v10
927; CHECK-NEXT:    ret
928  %out = shufflevector <32 x i8> %v, <32 x i8> poison, <32 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
929  ret <32 x i8> %out
930}
931
932define <8 x i32> @shuffle_decompress_singlesrc_e32(<8 x i32> %v) {
933; CHECK-LABEL: shuffle_decompress_singlesrc_e32:
934; CHECK:       # %bb.0:
935; CHECK-NEXT:    lui a0, %hi(.LCPI65_0)
936; CHECK-NEXT:    addi a0, a0, %lo(.LCPI65_0)
937; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
938; CHECK-NEXT:    vle16.v v12, (a0)
939; CHECK-NEXT:    vrgatherei16.vv v10, v8, v12
940; CHECK-NEXT:    vmv.v.v v8, v10
941; CHECK-NEXT:    ret
942  %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 3, i32 undef, i32 undef, i32 4>
943  ret <8 x i32> %out
944}
945
946; TODO: This should be a single vslideup.vi
947define <8 x i8> @shuffle_decompress_singlesrc_e8(<8 x i8> %v) {
948; CHECK-LABEL: shuffle_decompress_singlesrc_e8:
949; CHECK:       # %bb.0:
950; CHECK-NEXT:    lui a0, %hi(.LCPI66_0)
951; CHECK-NEXT:    addi a0, a0, %lo(.LCPI66_0)
952; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
953; CHECK-NEXT:    vle8.v v10, (a0)
954; CHECK-NEXT:    vrgather.vv v9, v8, v10
955; CHECK-NEXT:    vmv1r.v v8, v9
956; CHECK-NEXT:    ret
957  %out = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 4>
958  ret <8 x i8> %out
959}
960
961
962define <8 x i32> @shuffle_repeat2_singlesrc_e32(<8 x i32> %v) {
963; CHECK-LABEL: shuffle_repeat2_singlesrc_e32:
964; CHECK:       # %bb.0:
965; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
966; CHECK-NEXT:    vwaddu.vv v10, v8, v8
967; CHECK-NEXT:    li a0, -1
968; CHECK-NEXT:    vwmaccu.vx v10, a0, v8
969; CHECK-NEXT:    vmv2r.v v8, v10
970; CHECK-NEXT:    ret
971  %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
972  ret <8 x i32> %out
973}
974
975define <8 x i32> @shuffle_repeat3_singlesrc_e32(<8 x i32> %v) {
976; CHECK-LABEL: shuffle_repeat3_singlesrc_e32:
977; CHECK:       # %bb.0:
978; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
979; CHECK-NEXT:    vmv.v.i v0, 7
980; CHECK-NEXT:    vmv.v.i v11, 1
981; CHECK-NEXT:    li a0, 192
982; CHECK-NEXT:    vmv.s.x v10, a0
983; CHECK-NEXT:    vmerge.vim v11, v11, 0, v0
984; CHECK-NEXT:    vmv.v.v v0, v10
985; CHECK-NEXT:    vmerge.vim v12, v11, 2, v0
986; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
987; CHECK-NEXT:    vrgatherei16.vv v10, v8, v12
988; CHECK-NEXT:    vmv.v.v v8, v10
989; CHECK-NEXT:    ret
990  %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2>
991  ret <8 x i32> %out
992}
993
994define <8 x i32> @shuffle_repeat4_singlesrc_e32(<8 x i32> %v) {
995; CHECK-LABEL: shuffle_repeat4_singlesrc_e32:
996; CHECK:       # %bb.0:
997; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
998; CHECK-NEXT:    vid.v v10
999; CHECK-NEXT:    vsrl.vi v12, v10, 2
1000; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
1001; CHECK-NEXT:    vrgatherei16.vv v10, v8, v12
1002; CHECK-NEXT:    vmv.v.v v8, v10
1003; CHECK-NEXT:    ret
1004  %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
1005  ret <8 x i32> %out
1006}
1007
1008define <8 x i32> @shuffle_zipeven_v8i32(<8 x i32> %v1, <8 x i32> %v2) {
1009; CHECK-LABEL: shuffle_zipeven_v8i32:
1010; CHECK:       # %bb.0:
1011; CHECK-NEXT:    li a0, 170
1012; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
1013; CHECK-NEXT:    vmv.s.x v0, a0
1014; CHECK-NEXT:    vslideup.vi v8, v10, 1, v0.t
1015; CHECK-NEXT:    ret
1016  %out = shufflevector <8 x i32> %v1, <8 x i32> %v2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1017  ret <8 x i32> %out
1018}
1019
1020define <8 x i32> @shuffle_zipodd_v8i32(<8 x i32> %v1, <8 x i32> %v2) {
1021; CHECK-LABEL: shuffle_zipodd_v8i32:
1022; CHECK:       # %bb.0:
1023; CHECK-NEXT:    li a0, 85
1024; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
1025; CHECK-NEXT:    vmv.s.x v0, a0
1026; CHECK-NEXT:    vslidedown.vi v10, v8, 1, v0.t
1027; CHECK-NEXT:    vmv.v.v v8, v10
1028; CHECK-NEXT:    ret
1029  %out = shufflevector <8 x i32> %v1, <8 x i32> %v2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
1030  ret <8 x i32> %out
1031}
1032
1033define <16 x i64> @shuffle_zipeven_v16i64(<16 x i64> %v1, <16 x i64> %v2) {
1034; CHECK-LABEL: shuffle_zipeven_v16i64:
1035; CHECK:       # %bb.0:
1036; CHECK-NEXT:    lui a0, 11
1037; CHECK-NEXT:    addi a0, a0, -1366
1038; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
1039; CHECK-NEXT:    vmv.s.x v0, a0
1040; CHECK-NEXT:    vslideup.vi v8, v16, 1, v0.t
1041; CHECK-NEXT:    ret
1042  %out = shufflevector <16 x i64> %v1, <16 x i64> %v2, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
1043  ret <16 x i64> %out
1044}
1045
1046define <16 x i64> @shuffle_zipodd_v16i64(<16 x i64> %v1, <16 x i64> %v2) {
1047; CHECK-LABEL: shuffle_zipodd_v16i64:
1048; CHECK:       # %bb.0:
1049; CHECK-NEXT:    lui a0, 5
1050; CHECK-NEXT:    addi a0, a0, 1365
1051; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
1052; CHECK-NEXT:    vmv.s.x v0, a0
1053; CHECK-NEXT:    vslidedown.vi v16, v8, 1, v0.t
1054; CHECK-NEXT:    vmv.v.v v8, v16
1055; CHECK-NEXT:    ret
1056  %out = shufflevector <16 x i64> %v1, <16 x i64> %v2, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
1057  ret <16 x i64> %out
1058}
1059
1060define <16 x i32> @shuffle_disjoint_lanes(<16 x i32> %v, <16 x i32> %w) {
1061; CHECK-LABEL: shuffle_disjoint_lanes:
1062; CHECK:       # %bb.0:
1063; CHECK-NEXT:    lui a0, %hi(.LCPI74_0)
1064; CHECK-NEXT:    addi a0, a0, %lo(.LCPI74_0)
1065; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
1066; CHECK-NEXT:    vle8.v v16, (a0)
1067; CHECK-NEXT:    lui a0, 11
1068; CHECK-NEXT:    addi a0, a0, -1366
1069; CHECK-NEXT:    vmv.s.x v0, a0
1070; CHECK-NEXT:    vmerge.vvm v12, v12, v8, v0
1071; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1072; CHECK-NEXT:    vsext.vf2 v18, v16
1073; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
1074; CHECK-NEXT:    vrgatherei16.vv v8, v12, v18
1075; CHECK-NEXT:    ret
1076  %out = shufflevector <16 x i32> %v, <16 x i32> %w, <16 x i32> <i32 11, i32 15, i32 7, i32 3, i32 26, i32 30, i32 22, i32 18, i32 9, i32 13, i32 5, i32 1, i32 24, i32 28, i32 20, i32 16>
1077  ret <16 x i32> %out
1078}
1079
1080define <16 x i32> @shuffle_disjoint_lanes_one_identity(<16 x i32> %v, <16 x i32> %w) {
1081; CHECK-LABEL: shuffle_disjoint_lanes_one_identity:
1082; CHECK:       # %bb.0:
1083; CHECK-NEXT:    lui a0, %hi(.LCPI75_0)
1084; CHECK-NEXT:    addi a0, a0, %lo(.LCPI75_0)
1085; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
1086; CHECK-NEXT:    vle16.v v16, (a0)
1087; CHECK-NEXT:    li a0, -272
1088; CHECK-NEXT:    vmv.s.x v0, a0
1089; CHECK-NEXT:    vrgatherei16.vv v8, v12, v16, v0.t
1090; CHECK-NEXT:    ret
1091  %out = shufflevector <16 x i32> %v, <16 x i32> %w, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 26, i32 30, i32 22, i32 20, i32 8, i32 31, i32 29, i32 28, i32 27, i32 23, i32 25, i32 22>
1092  ret <16 x i32> %out
1093}
1094
1095define <16 x i32> @shuffle_disjoint_lanes_one_broadcast(<16 x i32> %v, <16 x i32> %w) {
1096; CHECK-LABEL: shuffle_disjoint_lanes_one_broadcast:
1097; CHECK:       # %bb.0:
1098; CHECK-NEXT:    lui a0, %hi(.LCPI76_0)
1099; CHECK-NEXT:    addi a0, a0, %lo(.LCPI76_0)
1100; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
1101; CHECK-NEXT:    vle16.v v20, (a0)
1102; CHECK-NEXT:    lui a0, 15
1103; CHECK-NEXT:    addi a0, a0, 240
1104; CHECK-NEXT:    vmv.s.x v0, a0
1105; CHECK-NEXT:    vrgather.vi v16, v8, 7
1106; CHECK-NEXT:    vrgatherei16.vv v16, v12, v20, v0.t
1107; CHECK-NEXT:    vmv.v.v v8, v16
1108; CHECK-NEXT:    ret
1109  %out = shufflevector <16 x i32> %v, <16 x i32> %w, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 26, i32 30, i32 22, i32 18, i32 7, i32 7, i32 7, i32 7, i32 24, i32 28, i32 20, i32 16>
1110  ret <16 x i32> %out
1111}
1112
1113define <16 x i32> @shuffle_disjoint_lanes_one_splat(i32 %v, <16 x i32> %w) {
1114; CHECK-LABEL: shuffle_disjoint_lanes_one_splat:
1115; CHECK:       # %bb.0:
1116; CHECK-NEXT:    lui a1, %hi(.LCPI77_0)
1117; CHECK-NEXT:    addi a1, a1, %lo(.LCPI77_0)
1118; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
1119; CHECK-NEXT:    vle16.v v16, (a1)
1120; CHECK-NEXT:    lui a1, 15
1121; CHECK-NEXT:    addi a1, a1, 240
1122; CHECK-NEXT:    vmv.s.x v0, a1
1123; CHECK-NEXT:    vmv.v.x v12, a0
1124; CHECK-NEXT:    vrgatherei16.vv v12, v8, v16, v0.t
1125; CHECK-NEXT:    vmv.v.v v8, v12
1126; CHECK-NEXT:    ret
1127  %head = insertelement <16 x i32> poison, i32 %v, i32 0
1128  %splat = shufflevector <16 x i32> %head, <16 x i32> poison, <16 x i32> zeroinitializer
1129  %out = shufflevector <16 x i32> %splat, <16 x i32> %w, <16 x i32> <i32 11, i32 15, i32 7, i32 3, i32 26, i32 30, i32 22, i32 18, i32 9, i32 13, i32 5, i32 1, i32 24, i32 28, i32 20, i32 16>
1130  ret <16 x i32> %out
1131}
1132
1133define <4 x i128> @shuffle_i128(<4 x i128> %a) {
1134; RV32-LABEL: shuffle_i128:
1135; RV32:       # %bb.0:
1136; RV32-NEXT:    addi sp, sp, -128
1137; RV32-NEXT:    .cfi_def_cfa_offset 128
1138; RV32-NEXT:    sw ra, 124(sp) # 4-byte Folded Spill
1139; RV32-NEXT:    sw s0, 120(sp) # 4-byte Folded Spill
1140; RV32-NEXT:    .cfi_offset ra, -4
1141; RV32-NEXT:    .cfi_offset s0, -8
1142; RV32-NEXT:    addi s0, sp, 128
1143; RV32-NEXT:    .cfi_def_cfa s0, 0
1144; RV32-NEXT:    andi sp, sp, -64
1145; RV32-NEXT:    lw a2, 60(a1)
1146; RV32-NEXT:    sw a2, 60(sp)
1147; RV32-NEXT:    lw a2, 56(a1)
1148; RV32-NEXT:    sw a2, 56(sp)
1149; RV32-NEXT:    lw a2, 52(a1)
1150; RV32-NEXT:    sw a2, 52(sp)
1151; RV32-NEXT:    lw a2, 48(a1)
1152; RV32-NEXT:    sw a2, 48(sp)
1153; RV32-NEXT:    lw a2, 44(a1)
1154; RV32-NEXT:    sw a2, 44(sp)
1155; RV32-NEXT:    lw a2, 40(a1)
1156; RV32-NEXT:    sw a2, 40(sp)
1157; RV32-NEXT:    lw a2, 36(a1)
1158; RV32-NEXT:    sw a2, 36(sp)
1159; RV32-NEXT:    lw a2, 32(a1)
1160; RV32-NEXT:    sw a2, 32(sp)
1161; RV32-NEXT:    lw a2, 12(a1)
1162; RV32-NEXT:    sw a2, 12(sp)
1163; RV32-NEXT:    lw a2, 8(a1)
1164; RV32-NEXT:    sw a2, 8(sp)
1165; RV32-NEXT:    lw a2, 4(a1)
1166; RV32-NEXT:    sw a2, 4(sp)
1167; RV32-NEXT:    lw a1, 0(a1)
1168; RV32-NEXT:    mv a2, sp
1169; RV32-NEXT:    sw a1, 0(sp)
1170; RV32-NEXT:    lui a1, %hi(.LCPI78_0)
1171; RV32-NEXT:    addi a1, a1, %lo(.LCPI78_0)
1172; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
1173; RV32-NEXT:    vle32.v v8, (a2)
1174; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1175; RV32-NEXT:    vle16.v v12, (a1)
1176; RV32-NEXT:    vrgatherei16.vv v16, v8, v12
1177; RV32-NEXT:    vse64.v v16, (a0)
1178; RV32-NEXT:    addi sp, s0, -128
1179; RV32-NEXT:    .cfi_def_cfa sp, 128
1180; RV32-NEXT:    lw ra, 124(sp) # 4-byte Folded Reload
1181; RV32-NEXT:    lw s0, 120(sp) # 4-byte Folded Reload
1182; RV32-NEXT:    .cfi_restore ra
1183; RV32-NEXT:    .cfi_restore s0
1184; RV32-NEXT:    addi sp, sp, 128
1185; RV32-NEXT:    .cfi_def_cfa_offset 0
1186; RV32-NEXT:    ret
1187;
1188; RV64-LABEL: shuffle_i128:
1189; RV64:       # %bb.0:
1190; RV64-NEXT:    addi sp, sp, -128
1191; RV64-NEXT:    .cfi_def_cfa_offset 128
1192; RV64-NEXT:    sd ra, 120(sp) # 8-byte Folded Spill
1193; RV64-NEXT:    sd s0, 112(sp) # 8-byte Folded Spill
1194; RV64-NEXT:    .cfi_offset ra, -8
1195; RV64-NEXT:    .cfi_offset s0, -16
1196; RV64-NEXT:    addi s0, sp, 128
1197; RV64-NEXT:    .cfi_def_cfa s0, 0
1198; RV64-NEXT:    andi sp, sp, -64
1199; RV64-NEXT:    ld a2, 56(a1)
1200; RV64-NEXT:    sd a2, 56(sp)
1201; RV64-NEXT:    ld a2, 48(a1)
1202; RV64-NEXT:    sd a2, 48(sp)
1203; RV64-NEXT:    ld a2, 40(a1)
1204; RV64-NEXT:    sd a2, 40(sp)
1205; RV64-NEXT:    ld a2, 32(a1)
1206; RV64-NEXT:    sd a2, 32(sp)
1207; RV64-NEXT:    ld a2, 8(a1)
1208; RV64-NEXT:    sd a2, 8(sp)
1209; RV64-NEXT:    ld a1, 0(a1)
1210; RV64-NEXT:    mv a2, sp
1211; RV64-NEXT:    sd a1, 0(sp)
1212; RV64-NEXT:    lui a1, %hi(.LCPI78_0)
1213; RV64-NEXT:    addi a1, a1, %lo(.LCPI78_0)
1214; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1215; RV64-NEXT:    vle64.v v8, (a2)
1216; RV64-NEXT:    vle16.v v12, (a1)
1217; RV64-NEXT:    vrgatherei16.vv v16, v8, v12
1218; RV64-NEXT:    vse64.v v16, (a0)
1219; RV64-NEXT:    addi sp, s0, -128
1220; RV64-NEXT:    .cfi_def_cfa sp, 128
1221; RV64-NEXT:    ld ra, 120(sp) # 8-byte Folded Reload
1222; RV64-NEXT:    ld s0, 112(sp) # 8-byte Folded Reload
1223; RV64-NEXT:    .cfi_restore ra
1224; RV64-NEXT:    .cfi_restore s0
1225; RV64-NEXT:    addi sp, sp, 128
1226; RV64-NEXT:    .cfi_def_cfa_offset 0
1227; RV64-NEXT:    ret
1228  %res = shufflevector <4 x i128> %a, <4 x i128> poison, <4 x i32> <i32 0, i32 0, i32 3, i32 2>
1229  ret <4 x i128> %res
1230}
1231
1232define void @shuffle_i128_ldst(ptr %p) {
1233; CHECK-LABEL: shuffle_i128_ldst:
1234; CHECK:       # %bb.0:
1235; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1236; CHECK-NEXT:    vle64.v v8, (a0)
1237; CHECK-NEXT:    lui a1, %hi(.LCPI79_0)
1238; CHECK-NEXT:    addi a1, a1, %lo(.LCPI79_0)
1239; CHECK-NEXT:    vle16.v v12, (a1)
1240; CHECK-NEXT:    vrgatherei16.vv v16, v8, v12
1241; CHECK-NEXT:    vse64.v v16, (a0)
1242; CHECK-NEXT:    ret
1243  %a = load <4 x i128>, ptr %p
1244  %res = shufflevector <4 x i128> %a, <4 x i128> poison, <4 x i32> <i32 0, i32 0, i32 3, i32 2>
1245  store <4 x i128> %res, ptr %p
1246  ret void
1247}
1248
1249define void @shuffle_i256_ldst(ptr %p) {
1250; CHECK-LABEL: shuffle_i256_ldst:
1251; CHECK:       # %bb.0:
1252; CHECK-NEXT:    lui a1, %hi(.LCPI80_0)
1253; CHECK-NEXT:    addi a1, a1, %lo(.LCPI80_0)
1254; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
1255; CHECK-NEXT:    vle8.v v8, (a1)
1256; CHECK-NEXT:    vle64.v v16, (a0)
1257; CHECK-NEXT:    vsext.vf2 v10, v8
1258; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1259; CHECK-NEXT:    vrgatherei16.vv v24, v16, v10
1260; CHECK-NEXT:    vse64.v v24, (a0)
1261; CHECK-NEXT:    ret
1262  %a = load <4 x i256>, ptr %p
1263  %res = shufflevector <4 x i256> %a, <4 x i256> poison, <4 x i32> <i32 0, i32 0, i32 3, i32 2>
1264  store <4 x i256> %res, ptr %p
1265  ret void
1266}
1267
1268define void @shuffle_i64_splat(ptr %p) nounwind {
1269; RV32-LABEL: shuffle_i64_splat:
1270; RV32:       # %bb.0:
1271; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
1272; RV32-NEXT:    vlse64.v v8, (a0), zero
1273; RV32-NEXT:    vse64.v v8, (a0)
1274; RV32-NEXT:    ret
1275;
1276; RV64-LABEL: shuffle_i64_splat:
1277; RV64:       # %bb.0:
1278; RV64-NEXT:    ld a1, 0(a0)
1279; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
1280; RV64-NEXT:    vmv.v.x v8, a1
1281; RV64-NEXT:    vse64.v v8, (a0)
1282; RV64-NEXT:    ret
1283  %a = load <4 x i64>, ptr %p
1284  %res = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1285  store <4 x i64> %res, ptr %p
1286  ret void
1287}
1288
1289define void @shuffle_i128_splat(ptr %p) nounwind {
1290; CHECK-LABEL: shuffle_i128_splat:
1291; CHECK:       # %bb.0:
1292; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1293; CHECK-NEXT:    vle64.v v8, (a0)
1294; CHECK-NEXT:    lui a1, 16
1295; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1296; CHECK-NEXT:    vmv.v.x v12, a1
1297; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1298; CHECK-NEXT:    vrgatherei16.vv v16, v8, v12
1299; CHECK-NEXT:    vse64.v v16, (a0)
1300; CHECK-NEXT:    ret
1301  %a = load <4 x i128>, ptr %p
1302  %res = shufflevector <4 x i128> %a, <4 x i128> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1303  store <4 x i128> %res, ptr %p
1304  ret void
1305}
1306
1307define void @shuffle_i256_splat(ptr %p) nounwind {
1308; RV32-LABEL: shuffle_i256_splat:
1309; RV32:       # %bb.0:
1310; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1311; RV32-NEXT:    vle64.v v8, (a0)
1312; RV32-NEXT:    lui a1, 12320
1313; RV32-NEXT:    addi a1, a1, 256
1314; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1315; RV32-NEXT:    vmv.v.x v16, a1
1316; RV32-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
1317; RV32-NEXT:    vsext.vf2 v18, v16
1318; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1319; RV32-NEXT:    vrgatherei16.vv v24, v8, v18
1320; RV32-NEXT:    vse64.v v24, (a0)
1321; RV32-NEXT:    ret
1322;
1323; RV64-LABEL: shuffle_i256_splat:
1324; RV64:       # %bb.0:
1325; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1326; RV64-NEXT:    vle64.v v8, (a0)
1327; RV64-NEXT:    lui a1, 98305
1328; RV64-NEXT:    slli a1, a1, 5
1329; RV64-NEXT:    addi a1, a1, 1
1330; RV64-NEXT:    slli a1, a1, 16
1331; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
1332; RV64-NEXT:    vmv.v.x v16, a1
1333; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1334; RV64-NEXT:    vrgatherei16.vv v24, v8, v16
1335; RV64-NEXT:    vse64.v v24, (a0)
1336; RV64-NEXT:    ret
1337  %a = load <4 x i256>, ptr %p
1338  %res = shufflevector <4 x i256> %a, <4 x i256> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1339  store <4 x i256> %res, ptr %p
1340  ret void
1341}
1342
1343