xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll (revision bb6e94a05d15d289e3685c5599f0eb905dc46925)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
4
5define <4 x i64> @m2_splat_0(<4 x i64> %v1) vscale_range(2,2) {
6; CHECK-LABEL: m2_splat_0:
7; CHECK:       # %bb.0:
8; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
9; CHECK-NEXT:    vrgather.vi v10, v8, 0
10; CHECK-NEXT:    vmv.v.v v8, v10
11; CHECK-NEXT:    ret
12  %res = shufflevector <4 x i64> %v1, <4 x i64> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
13  ret <4 x i64> %res
14}
15
16define <4 x i64> @m2_splat_in_chunks(<4 x i64> %v1) vscale_range(2,2) {
17; CHECK-LABEL: m2_splat_in_chunks:
18; CHECK:       # %bb.0:
19; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
20; CHECK-NEXT:    vrgather.vi v10, v8, 0
21; CHECK-NEXT:    vrgather.vi v11, v9, 0
22; CHECK-NEXT:    vmv2r.v v8, v10
23; CHECK-NEXT:    ret
24  %res = shufflevector <4 x i64> %v1, <4 x i64> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
25  ret <4 x i64> %res
26}
27
28define <8 x i64> @m4_splat_in_chunks(<8 x i64> %v1) vscale_range(2,2) {
29; CHECK-LABEL: m4_splat_in_chunks:
30; CHECK:       # %bb.0:
31; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
32; CHECK-NEXT:    vrgather.vi v12, v8, 0
33; CHECK-NEXT:    vrgather.vi v13, v9, 0
34; CHECK-NEXT:    vrgather.vi v14, v10, 0
35; CHECK-NEXT:    vrgather.vi v15, v11, 1
36; CHECK-NEXT:    vmv4r.v v8, v12
37; CHECK-NEXT:    ret
38  %res = shufflevector <8 x i64> %v1, <8 x i64> poison, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 7, i32 7>
39  ret <8 x i64> %res
40}
41
42
43define <4 x i64> @m2_splat_with_tail(<4 x i64> %v1) vscale_range(2,2) {
44; CHECK-LABEL: m2_splat_with_tail:
45; CHECK:       # %bb.0:
46; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
47; CHECK-NEXT:    vrgather.vi v10, v8, 0
48; CHECK-NEXT:    vmv1r.v v11, v9
49; CHECK-NEXT:    vmv2r.v v8, v10
50; CHECK-NEXT:    ret
51  %res = shufflevector <4 x i64> %v1, <4 x i64> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 3>
52  ret <4 x i64> %res
53}
54
55define <4 x i64> @m2_pair_swap_vl4(<4 x i64> %v1) vscale_range(2,2) {
56; CHECK-LABEL: m2_pair_swap_vl4:
57; CHECK:       # %bb.0:
58; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
59; CHECK-NEXT:    vslidedown.vi v11, v9, 1
60; CHECK-NEXT:    vslideup.vi v11, v9, 1
61; CHECK-NEXT:    vslidedown.vi v10, v8, 1
62; CHECK-NEXT:    vslideup.vi v10, v8, 1
63; CHECK-NEXT:    vmv2r.v v8, v10
64; CHECK-NEXT:    ret
65  %res = shufflevector <4 x i64> %v1, <4 x i64> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
66  ret <4 x i64> %res
67}
68
69define <8 x i32> @m2_pair_swap_vl8(<8 x i32> %v1) vscale_range(2,2) {
70; RV32-LABEL: m2_pair_swap_vl8:
71; RV32:       # %bb.0:
72; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
73; RV32-NEXT:    vmv.v.i v10, 0
74; RV32-NEXT:    li a0, 32
75; RV32-NEXT:    li a1, 63
76; RV32-NEXT:    vwsubu.vx v12, v10, a0
77; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
78; RV32-NEXT:    vmv.v.x v10, a0
79; RV32-NEXT:    vand.vx v12, v12, a1
80; RV32-NEXT:    vand.vx v10, v10, a1
81; RV32-NEXT:    vsrl.vv v12, v8, v12
82; RV32-NEXT:    vsll.vv v8, v8, v10
83; RV32-NEXT:    vor.vv v8, v8, v12
84; RV32-NEXT:    ret
85;
86; RV64-LABEL: m2_pair_swap_vl8:
87; RV64:       # %bb.0:
88; RV64-NEXT:    li a0, 32
89; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
90; RV64-NEXT:    vsrl.vx v10, v8, a0
91; RV64-NEXT:    vsll.vx v8, v8, a0
92; RV64-NEXT:    vor.vv v8, v8, v10
93; RV64-NEXT:    ret
94  %res = shufflevector <8 x i32> %v1, <8 x i32> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
95  ret <8 x i32> %res
96}
97
98define <4 x i64> @m2_splat_into_identity(<4 x i64> %v1) vscale_range(2,2) {
99; CHECK-LABEL: m2_splat_into_identity:
100; CHECK:       # %bb.0:
101; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
102; CHECK-NEXT:    vrgather.vi v10, v8, 0
103; CHECK-NEXT:    vmv1r.v v11, v9
104; CHECK-NEXT:    vmv2r.v v8, v10
105; CHECK-NEXT:    ret
106  %res = shufflevector <4 x i64> %v1, <4 x i64> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 3>
107  ret <4 x i64> %res
108}
109
110define <4 x i64> @m2_broadcast_i128(<4 x i64> %v1) vscale_range(2,2) {
111; CHECK-LABEL: m2_broadcast_i128:
112; CHECK:       # %bb.0:
113; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
114; CHECK-NEXT:    vmv1r.v v9, v8
115; CHECK-NEXT:    ret
116  %res = shufflevector <4 x i64> %v1, <4 x i64> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
117  ret <4 x i64> %res
118}
119
120define <8 x i64> @m4_broadcast_i128(<8 x i64> %v1) vscale_range(2,2) {
121; CHECK-LABEL: m4_broadcast_i128:
122; CHECK:       # %bb.0:
123; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
124; CHECK-NEXT:    vmv1r.v v9, v8
125; CHECK-NEXT:    vmv1r.v v10, v8
126; CHECK-NEXT:    vmv1r.v v11, v8
127; CHECK-NEXT:    ret
128  %res = shufflevector <8 x i64> %v1, <8 x i64> poison, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
129  ret <8 x i64> %res
130}
131
132
133define <4 x i64> @m2_splat_two_source(<4 x i64> %v1, <4 x i64> %v2) vscale_range(2,2) {
134; CHECK-LABEL: m2_splat_two_source:
135; CHECK:       # %bb.0:
136; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
137; CHECK-NEXT:    vrgather.vi v12, v8, 0
138; CHECK-NEXT:    vrgather.vi v13, v11, 1
139; CHECK-NEXT:    vmv2r.v v8, v12
140; CHECK-NEXT:    ret
141  %res = shufflevector <4 x i64> %v1, <4 x i64> %v2, <4 x i32> <i32 0, i32 0, i32 7, i32 7>
142  ret <4 x i64> %res
143}
144
145define <4 x i64> @m2_splat_into_identity_two_source_v2_hi(<4 x i64> %v1, <4 x i64> %v2) vscale_range(2,2) {
146; CHECK-LABEL: m2_splat_into_identity_two_source_v2_hi:
147; CHECK:       # %bb.0:
148; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
149; CHECK-NEXT:    vrgather.vi v10, v8, 0
150; CHECK-NEXT:    vmv2r.v v8, v10
151; CHECK-NEXT:    ret
152  %res = shufflevector <4 x i64> %v1, <4 x i64> %v2, <4 x i32> <i32 0, i32 0, i32 6, i32 7>
153  ret <4 x i64> %res
154}
155
156define <4 x i64> @m2_splat_into_slide_two_source_v2_lo(<4 x i64> %v1, <4 x i64> %v2) vscale_range(2,2) {
157; CHECK-LABEL: m2_splat_into_slide_two_source_v2_lo:
158; CHECK:       # %bb.0:
159; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
160; CHECK-NEXT:    vrgather.vi v12, v8, 0
161; CHECK-NEXT:    vmv1r.v v13, v10
162; CHECK-NEXT:    vmv2r.v v8, v12
163; CHECK-NEXT:    ret
164  %res = shufflevector <4 x i64> %v1, <4 x i64> %v2, <4 x i32> <i32 0, i32 0, i32 4, i32 5>
165  ret <4 x i64> %res
166}
167
168define <4 x i64> @m2_splat_into_slide_two_source(<4 x i64> %v1, <4 x i64> %v2) vscale_range(2,2) {
169; CHECK-LABEL: m2_splat_into_slide_two_source:
170; CHECK:       # %bb.0:
171; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
172; CHECK-NEXT:    vslidedown.vi v13, v10, 1
173; CHECK-NEXT:    vslideup.vi v13, v11, 1
174; CHECK-NEXT:    vrgather.vi v12, v8, 0
175; CHECK-NEXT:    vmv2r.v v8, v12
176; CHECK-NEXT:    ret
177  %res = shufflevector <4 x i64> %v1, <4 x i64> %v2, <4 x i32> <i32 0, i32 0, i32 5, i32 6>
178  ret <4 x i64> %res
179}
180
181define void @shuffle1(ptr %explicit_0, ptr %explicit_1) vscale_range(2,2) {
182; CHECK-LABEL: shuffle1:
183; CHECK:       # %bb.0:
184; CHECK-NEXT:    addi a0, a0, 252
185; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
186; CHECK-NEXT:    vmv.v.i v8, 0
187; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
188; CHECK-NEXT:    vid.v v10
189; CHECK-NEXT:    vsetivli zero, 3, e32, m1, ta, ma
190; CHECK-NEXT:    vle32.v v11, (a0)
191; CHECK-NEXT:    vmv.v.i v0, 5
192; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
193; CHECK-NEXT:    vsrl.vi v10, v10, 1
194; CHECK-NEXT:    vadd.vi v10, v10, 1
195; CHECK-NEXT:    vrgather.vv v9, v11, v10, v0.t
196; CHECK-NEXT:    addi a0, a1, 672
197; CHECK-NEXT:    vs2r.v v8, (a0)
198; CHECK-NEXT:    ret
199  %1 = getelementptr i32, ptr %explicit_0, i64 63
200  %2 = load <3 x i32>, ptr %1, align 1
201  %3 = shufflevector <3 x i32> %2, <3 x i32> undef, <2 x i32> <i32 1, i32 2>
202  %4 = shufflevector <2 x i32> %3, <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
203  %5 = shufflevector <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 undef, i32 0, i32 undef, i32 0>, <8 x i32> %4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 5, i32 9, i32 7>
204  %6 = getelementptr inbounds <8 x i32>, ptr %explicit_1, i64 21
205  store <8 x i32> %5, ptr %6, align 32
206  ret void
207}
208
209define <16 x float> @shuffle2(<4 x float> %a) vscale_range(2,2) {
210; CHECK-LABEL: shuffle2:
211; CHECK:       # %bb.0:
212; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
213; CHECK-NEXT:    vmv1r.v v12, v8
214; CHECK-NEXT:    vmv.v.i v8, 0
215; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
216; CHECK-NEXT:    vid.v v13
217; CHECK-NEXT:    vadd.vv v13, v13, v13
218; CHECK-NEXT:    vmv.v.i v0, 6
219; CHECK-NEXT:    vrsub.vi v13, v13, 4
220; CHECK-NEXT:    vrgather.vv v9, v12, v13, v0.t
221; CHECK-NEXT:    ret
222  %b = extractelement <4 x float> %a, i32 2
223  %c = insertelement <16 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %b, i32 5
224  %b1 = extractelement <4 x float> %a, i32 0
225  %c1 = insertelement <16 x float> %c, float %b1, i32 6
226  ret <16 x float>%c1
227}
228
229define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) vscale_range(2,2) {
230; RV32-LABEL: extract_any_extend_vector_inreg_v16i64:
231; RV32:       # %bb.0:
232; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
233; RV32-NEXT:    vmv.v.i v16, 0
234; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
235; RV32-NEXT:    vmv.v.i v0, 1
236; RV32-NEXT:    li a1, 32
237; RV32-NEXT:    vrgather.vi v18, v15, 1, v0.t
238; RV32-NEXT:    vsetivli zero, 1, e64, m8, ta, ma
239; RV32-NEXT:    vslidedown.vx v8, v16, a0
240; RV32-NEXT:    vmv.x.s a0, v8
241; RV32-NEXT:    vsrl.vx v8, v8, a1
242; RV32-NEXT:    vmv.x.s a1, v8
243; RV32-NEXT:    ret
244;
245; RV64-LABEL: extract_any_extend_vector_inreg_v16i64:
246; RV64:       # %bb.0:
247; RV64-NEXT:    addi sp, sp, -256
248; RV64-NEXT:    .cfi_def_cfa_offset 256
249; RV64-NEXT:    sd ra, 248(sp) # 8-byte Folded Spill
250; RV64-NEXT:    sd s0, 240(sp) # 8-byte Folded Spill
251; RV64-NEXT:    sd s2, 232(sp) # 8-byte Folded Spill
252; RV64-NEXT:    .cfi_offset ra, -8
253; RV64-NEXT:    .cfi_offset s0, -16
254; RV64-NEXT:    .cfi_offset s2, -24
255; RV64-NEXT:    addi s0, sp, 256
256; RV64-NEXT:    .cfi_def_cfa s0, 0
257; RV64-NEXT:    andi sp, sp, -128
258; RV64-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
259; RV64-NEXT:    vmv.v.i v0, 1
260; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
261; RV64-NEXT:    vmv.v.i v16, 0
262; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
263; RV64-NEXT:    vrgather.vi v18, v15, 1, v0.t
264; RV64-NEXT:    mv s2, sp
265; RV64-NEXT:    vs8r.v v16, (s2)
266; RV64-NEXT:    andi a0, a0, 15
267; RV64-NEXT:    li a1, 8
268; RV64-NEXT:    call __muldi3
269; RV64-NEXT:    add a0, s2, a0
270; RV64-NEXT:    ld a0, 0(a0)
271; RV64-NEXT:    addi sp, s0, -256
272; RV64-NEXT:    .cfi_def_cfa sp, 256
273; RV64-NEXT:    ld ra, 248(sp) # 8-byte Folded Reload
274; RV64-NEXT:    ld s0, 240(sp) # 8-byte Folded Reload
275; RV64-NEXT:    ld s2, 232(sp) # 8-byte Folded Reload
276; RV64-NEXT:    .cfi_restore ra
277; RV64-NEXT:    .cfi_restore s0
278; RV64-NEXT:    .cfi_restore s2
279; RV64-NEXT:    addi sp, sp, 256
280; RV64-NEXT:    .cfi_def_cfa_offset 0
281; RV64-NEXT:    ret
282  %1 = extractelement <16 x i64> %a0, i32 15
283  %2 = insertelement <16 x i64> zeroinitializer, i64 %1, i32 4
284  %3 = extractelement <16 x i64> %2, i32 %a1
285  ret i64 %3
286}
287
288define <4 x double> @shuffles_add(<4 x double> %0, <4 x double> %1) vscale_range(2,2) {
289; CHECK-LABEL: shuffles_add:
290; CHECK:       # %bb.0:
291; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
292; CHECK-NEXT:    vmv1r.v v13, v10
293; CHECK-NEXT:    vslideup.vi v13, v11, 1
294; CHECK-NEXT:    vmv1r.v v8, v9
295; CHECK-NEXT:    vmv.v.i v0, 1
296; CHECK-NEXT:    vrgather.vi v12, v9, 0
297; CHECK-NEXT:    vmv1r.v v9, v11
298; CHECK-NEXT:    vrgather.vi v9, v10, 1, v0.t
299; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
300; CHECK-NEXT:    vfadd.vv v8, v12, v8
301; CHECK-NEXT:    ret
302  %3 = shufflevector <4 x double> %0, <4 x double> %1, <4 x i32> <i32 undef, i32 2, i32 4, i32 6>
303  %4 = shufflevector <4 x double> %0, <4 x double> %1, <4 x i32> <i32 undef, i32 3, i32 5, i32 7>
304  %5 = fadd <4 x double> %3, %4
305  ret <4 x double> %5
306}
307
308define <16 x i32> @m4_square_num_of_shuffles_in_chunks(<16 x i32> %0) vscale_range(2,2) {
309; CHECK-LABEL: m4_square_num_of_shuffles_in_chunks:
310; CHECK:       # %bb.0: # %entry
311; CHECK-NEXT:    lui a0, %hi(.LCPI17_0)
312; CHECK-NEXT:    addi a0, a0, %lo(.LCPI17_0)
313; CHECK-NEXT:    vl1r.v v12, (a0)
314; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
315; CHECK-NEXT:    vsext.vf2 v16, v12
316; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
317; CHECK-NEXT:    vrgatherei16.vv v12, v8, v16
318; CHECK-NEXT:    vmv.v.v v8, v12
319; CHECK-NEXT:    ret
320entry:
321  %1 = shufflevector <16 x i32> %0, <16 x i32> poison, <16 x i32> <i32 0, i32 5, i32 8, i32 12, i32 1, i32 4, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
322  ret <16 x i32> %1
323}
324
325define <16 x i32> @m4_linear_num_of_shuffles_in_chunks(<16 x i32> %0) vscale_range(2,2) {
326; CHECK-LABEL: m4_linear_num_of_shuffles_in_chunks:
327; CHECK:       # %bb.0: # %entry
328; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
329; CHECK-NEXT:    vmv.v.i v0, 8
330; CHECK-NEXT:    vrgather.vi v12, v10, 0
331; CHECK-NEXT:    vrgather.vi v12, v11, 0, v0.t
332; CHECK-NEXT:    vrgather.vi v14, v8, 2
333; CHECK-NEXT:    vrgather.vi v15, v10, 3
334; CHECK-NEXT:    vmv4r.v v8, v12
335; CHECK-NEXT:    ret
336entry:
337  %1 = shufflevector <16 x i32> %0, <16 x i32> poison, <16 x i32> <i32 poison, i32 poison, i32 8, i32 12, i32 poison, i32 poison, i32 poison, i32 poison, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 11, i32 poison>
338  ret <16 x i32> %1
339}
340
341define i64 @multi_chunks_shuffle(<32 x i32> %0) vscale_range(8,8) {
342; RV32-LABEL: multi_chunks_shuffle:
343; RV32:       # %bb.0: # %entry
344; RV32-NEXT:    vsetivli zero, 16, e32, m1, ta, ma
345; RV32-NEXT:    vmv.v.i v10, 0
346; RV32-NEXT:    li a0, 32
347; RV32-NEXT:    li a1, 63
348; RV32-NEXT:    vwsubu.vx v12, v10, a0
349; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
350; RV32-NEXT:    vmv.v.x v10, a0
351; RV32-NEXT:    lui a0, 61681
352; RV32-NEXT:    addi a0, a0, -241
353; RV32-NEXT:    vand.vx v12, v12, a1
354; RV32-NEXT:    vand.vx v10, v10, a1
355; RV32-NEXT:    vsrl.vv v12, v8, v12
356; RV32-NEXT:    vsll.vv v8, v8, v10
357; RV32-NEXT:    vmv.s.x v0, a0
358; RV32-NEXT:    vor.vv v8, v8, v12
359; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
360; RV32-NEXT:    vmv.v.i v10, 0
361; RV32-NEXT:    vmerge.vvm v8, v10, v8, v0
362; RV32-NEXT:    vrgather.vi v10, v8, 2
363; RV32-NEXT:    vor.vv v8, v8, v10
364; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
365; RV32-NEXT:    vslidedown.vi v8, v8, 1
366; RV32-NEXT:    vmv.x.s a0, v8
367; RV32-NEXT:    srai a1, a0, 31
368; RV32-NEXT:    ret
369;
370; RV64-LABEL: multi_chunks_shuffle:
371; RV64:       # %bb.0: # %entry
372; RV64-NEXT:    li a0, 32
373; RV64-NEXT:    vsetivli zero, 16, e64, m2, ta, ma
374; RV64-NEXT:    vsrl.vx v10, v8, a0
375; RV64-NEXT:    vsll.vx v8, v8, a0
376; RV64-NEXT:    lui a0, 61681
377; RV64-NEXT:    addi a0, a0, -241
378; RV64-NEXT:    vor.vv v8, v8, v10
379; RV64-NEXT:    vmv.s.x v0, a0
380; RV64-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
381; RV64-NEXT:    vmv.v.i v10, 0
382; RV64-NEXT:    vmerge.vvm v8, v10, v8, v0
383; RV64-NEXT:    vrgather.vi v10, v8, 2
384; RV64-NEXT:    vor.vv v8, v8, v10
385; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
386; RV64-NEXT:    vslidedown.vi v8, v8, 1
387; RV64-NEXT:    vmv.x.s a0, v8
388; RV64-NEXT:    ret
389entry:
390  %1 = shufflevector <32 x i32> %0, <32 x i32> zeroinitializer, <32 x i32> <i32 1, i32 0, i32 3, i32 2, i32 37, i32 36, i32 39, i32 38, i32 9, i32 8, i32 11, i32 10, i32 45, i32 44, i32 47, i32 46, i32 17, i32 16, i32 19, i32 18, i32 53, i32 52, i32 55, i32 54, i32 25, i32 24, i32 27, i32 26, i32 61, i32 60, i32 63, i32 62>
391  %2 = shufflevector <32 x i32> zeroinitializer, <32 x i32> %1, <32 x i32> <i32 3, i32 34, i32 33, i32 0, i32 7, i32 38, i32 37, i32 4, i32 11, i32 42, i32 41, i32 8, i32 15, i32 46, i32 45, i32 12, i32 19, i32 50, i32 49, i32 16, i32 23, i32 54, i32 53, i32 20, i32 27, i32 58, i32 57, i32 24, i32 31, i32 62, i32 61, i32 28>
392  %3 = or <32 x i32> %1, %2
393  %4 = extractelement <32 x i32> %3, i64 1
394  %conv199 = sext i32 %4 to i64
395  ret i64 %conv199
396}
397
398define void @shuffle_i128_ldst(ptr %p) vscale_range(2,2) {
399; CHECK-LABEL: shuffle_i128_ldst:
400; CHECK:       # %bb.0:
401; CHECK-NEXT:    vl4re64.v v8, (a0)
402; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
403; CHECK-NEXT:    vmv1r.v v9, v8
404; CHECK-NEXT:    vmv4r.v v12, v8
405; CHECK-NEXT:    vmv1r.v v14, v11
406; CHECK-NEXT:    vmv1r.v v15, v10
407; CHECK-NEXT:    vs4r.v v12, (a0)
408; CHECK-NEXT:    ret
409  %a = load <4 x i128>, ptr %p
410  %res = shufflevector <4 x i128> %a, <4 x i128> poison, <4 x i32> <i32 0, i32 0, i32 3, i32 2>
411  store <4 x i128> %res, ptr %p
412  ret void
413}
414
415define void @shuffle_i256_ldst(ptr %p) vscale_range(2,2) {
416; CHECK-LABEL: shuffle_i256_ldst:
417; CHECK:       # %bb.0:
418; CHECK-NEXT:    vl8re64.v v8, (a0)
419; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
420; CHECK-NEXT:    vmv1r.v v10, v8
421; CHECK-NEXT:    vmv1r.v v11, v9
422; CHECK-NEXT:    vmv8r.v v16, v8
423; CHECK-NEXT:    vmv1r.v v20, v14
424; CHECK-NEXT:    vmv1r.v v21, v15
425; CHECK-NEXT:    vmv1r.v v22, v12
426; CHECK-NEXT:    vmv1r.v v23, v13
427; CHECK-NEXT:    vs8r.v v16, (a0)
428; CHECK-NEXT:    ret
429  %a = load <4 x i256>, ptr %p
430  %res = shufflevector <4 x i256> %a, <4 x i256> poison, <4 x i32> <i32 0, i32 0, i32 3, i32 2>
431  store <4 x i256> %res, ptr %p
432  ret void
433}
434