xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll (revision f947d5afd951fe0883e8afe2d00c00d6a97e29bd)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfh,+zvl256b \
3; RUN:   -lower-interleaved-accesses=false -verify-machineinstrs \
4; RUN:   | FileCheck %s --check-prefixes=CHECK,V
5; RUN: llc < %s -mtriple=riscv64 -mattr=+f,+zve32f,+zvfh,+zvl256b \
6; RUN:   -lower-interleaved-accesses=false -verify-machineinstrs \
7; RUN:   | FileCheck %s --check-prefixes=CHECK,ZVE32F
8
9define void @vnsrl_0_i8(ptr %in, ptr %out) {
10; CHECK-LABEL: vnsrl_0_i8:
11; CHECK:       # %bb.0: # %entry
12; CHECK-NEXT:    vsetivli zero, 16, e8, mf2, ta, ma
13; CHECK-NEXT:    vle8.v v8, (a0)
14; CHECK-NEXT:    vsetivli zero, 8, e8, mf4, ta, ma
15; CHECK-NEXT:    vnsrl.wi v8, v8, 0
16; CHECK-NEXT:    vse8.v v8, (a1)
17; CHECK-NEXT:    ret
18entry:
19  %0 = load <16 x i8>, ptr %in, align 1
20  %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
21  store <8 x i8> %shuffle.i5, ptr %out, align 1
22  ret void
23}
24
25define void @vnsrl_8_i8(ptr %in, ptr %out) {
26; CHECK-LABEL: vnsrl_8_i8:
27; CHECK:       # %bb.0: # %entry
28; CHECK-NEXT:    vsetivli zero, 16, e8, mf2, ta, ma
29; CHECK-NEXT:    vle8.v v8, (a0)
30; CHECK-NEXT:    vsetivli zero, 8, e8, mf4, ta, ma
31; CHECK-NEXT:    vnsrl.wi v8, v8, 8
32; CHECK-NEXT:    vse8.v v8, (a1)
33; CHECK-NEXT:    ret
34entry:
35  %0 = load <16 x i8>, ptr %in, align 1
36  %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
37  store <8 x i8> %shuffle.i5, ptr %out, align 1
38  ret void
39}
40
41define void @vnsrl_0_i16(ptr %in, ptr %out) {
42; V-LABEL: vnsrl_0_i16:
43; V:       # %bb.0: # %entry
44; V-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
45; V-NEXT:    vle16.v v8, (a0)
46; V-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
47; V-NEXT:    vnsrl.wi v8, v8, 0
48; V-NEXT:    vse16.v v8, (a1)
49; V-NEXT:    ret
50;
51; ZVE32F-LABEL: vnsrl_0_i16:
52; ZVE32F:       # %bb.0: # %entry
53; ZVE32F-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
54; ZVE32F-NEXT:    vle16.v v8, (a0)
55; ZVE32F-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
56; ZVE32F-NEXT:    vnsrl.wi v8, v8, 0
57; ZVE32F-NEXT:    vse16.v v8, (a1)
58; ZVE32F-NEXT:    ret
59entry:
60  %0 = load <8 x i16>, ptr %in, align 2
61  %shuffle.i5 = shufflevector <8 x i16> %0, <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
62  store <4 x i16> %shuffle.i5, ptr %out, align 2
63  ret void
64}
65
66define void @vnsrl_16_i16(ptr %in, ptr %out) {
67; V-LABEL: vnsrl_16_i16:
68; V:       # %bb.0: # %entry
69; V-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
70; V-NEXT:    vle16.v v8, (a0)
71; V-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
72; V-NEXT:    vnsrl.wi v8, v8, 16
73; V-NEXT:    vse16.v v8, (a1)
74; V-NEXT:    ret
75;
76; ZVE32F-LABEL: vnsrl_16_i16:
77; ZVE32F:       # %bb.0: # %entry
78; ZVE32F-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
79; ZVE32F-NEXT:    vle16.v v8, (a0)
80; ZVE32F-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
81; ZVE32F-NEXT:    vnsrl.wi v8, v8, 16
82; ZVE32F-NEXT:    vse16.v v8, (a1)
83; ZVE32F-NEXT:    ret
84entry:
85  %0 = load <8 x i16>, ptr %in, align 2
86  %shuffle.i5 = shufflevector <8 x i16> %0, <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
87  store <4 x i16> %shuffle.i5, ptr %out, align 2
88  ret void
89}
90
91define void @vnsrl_0_half(ptr %in, ptr %out) {
92; V-LABEL: vnsrl_0_half:
93; V:       # %bb.0: # %entry
94; V-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
95; V-NEXT:    vle16.v v8, (a0)
96; V-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
97; V-NEXT:    vnsrl.wi v8, v8, 0
98; V-NEXT:    vse16.v v8, (a1)
99; V-NEXT:    ret
100;
101; ZVE32F-LABEL: vnsrl_0_half:
102; ZVE32F:       # %bb.0: # %entry
103; ZVE32F-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
104; ZVE32F-NEXT:    vle16.v v8, (a0)
105; ZVE32F-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
106; ZVE32F-NEXT:    vnsrl.wi v8, v8, 0
107; ZVE32F-NEXT:    vse16.v v8, (a1)
108; ZVE32F-NEXT:    ret
109entry:
110  %0 = load <8 x half>, ptr %in, align 2
111  %shuffle.i5 = shufflevector <8 x half> %0, <8 x half> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
112  store <4 x half> %shuffle.i5, ptr %out, align 2
113  ret void
114}
115
116define void @vnsrl_16_half(ptr %in, ptr %out) {
117; V-LABEL: vnsrl_16_half:
118; V:       # %bb.0: # %entry
119; V-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
120; V-NEXT:    vle16.v v8, (a0)
121; V-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
122; V-NEXT:    vnsrl.wi v8, v8, 16
123; V-NEXT:    vse16.v v8, (a1)
124; V-NEXT:    ret
125;
126; ZVE32F-LABEL: vnsrl_16_half:
127; ZVE32F:       # %bb.0: # %entry
128; ZVE32F-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
129; ZVE32F-NEXT:    vle16.v v8, (a0)
130; ZVE32F-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
131; ZVE32F-NEXT:    vnsrl.wi v8, v8, 16
132; ZVE32F-NEXT:    vse16.v v8, (a1)
133; ZVE32F-NEXT:    ret
134entry:
135  %0 = load <8 x half>, ptr %in, align 2
136  %shuffle.i5 = shufflevector <8 x half> %0, <8 x half> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
137  store <4 x half> %shuffle.i5, ptr %out, align 2
138  ret void
139}
140
141define void @vnsrl_0_i32(ptr %in, ptr %out) {
142; V-LABEL: vnsrl_0_i32:
143; V:       # %bb.0: # %entry
144; V-NEXT:    vsetivli zero, 4, e32, mf2, ta, ma
145; V-NEXT:    vle32.v v8, (a0)
146; V-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
147; V-NEXT:    vnsrl.wi v8, v8, 0
148; V-NEXT:    vse32.v v8, (a1)
149; V-NEXT:    ret
150;
151; ZVE32F-LABEL: vnsrl_0_i32:
152; ZVE32F:       # %bb.0: # %entry
153; ZVE32F-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
154; ZVE32F-NEXT:    vle32.v v8, (a0)
155; ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
156; ZVE32F-NEXT:    vslidedown.vi v9, v8, 2
157; ZVE32F-NEXT:    vslideup.vi v8, v9, 1
158; ZVE32F-NEXT:    vse32.v v8, (a1)
159; ZVE32F-NEXT:    ret
160entry:
161  %0 = load <4 x i32>, ptr %in, align 4
162  %shuffle.i5 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> <i32 0, i32 2>
163  store <2 x i32> %shuffle.i5, ptr %out, align 4
164  ret void
165}
166
167define void @vnsrl_32_i32(ptr %in, ptr %out) {
168; V-LABEL: vnsrl_32_i32:
169; V:       # %bb.0: # %entry
170; V-NEXT:    vsetivli zero, 4, e32, mf2, ta, ma
171; V-NEXT:    vle32.v v8, (a0)
172; V-NEXT:    li a0, 32
173; V-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
174; V-NEXT:    vnsrl.wx v8, v8, a0
175; V-NEXT:    vse32.v v8, (a1)
176; V-NEXT:    ret
177;
178; ZVE32F-LABEL: vnsrl_32_i32:
179; ZVE32F:       # %bb.0: # %entry
180; ZVE32F-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
181; ZVE32F-NEXT:    vle32.v v8, (a0)
182; ZVE32F-NEXT:    vmv.v.i v0, 1
183; ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, ta, mu
184; ZVE32F-NEXT:    vslidedown.vi v9, v8, 2
185; ZVE32F-NEXT:    vrgather.vi v9, v8, 1, v0.t
186; ZVE32F-NEXT:    vse32.v v9, (a1)
187; ZVE32F-NEXT:    ret
188entry:
189  %0 = load <4 x i32>, ptr %in, align 4
190  %shuffle.i5 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> <i32 1, i32 3>
191  store <2 x i32> %shuffle.i5, ptr %out, align 4
192  ret void
193}
194
195define void @vnsrl_0_float(ptr %in, ptr %out) {
196; V-LABEL: vnsrl_0_float:
197; V:       # %bb.0: # %entry
198; V-NEXT:    vsetivli zero, 4, e32, mf2, ta, ma
199; V-NEXT:    vle32.v v8, (a0)
200; V-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
201; V-NEXT:    vnsrl.wi v8, v8, 0
202; V-NEXT:    vse32.v v8, (a1)
203; V-NEXT:    ret
204;
205; ZVE32F-LABEL: vnsrl_0_float:
206; ZVE32F:       # %bb.0: # %entry
207; ZVE32F-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
208; ZVE32F-NEXT:    vle32.v v8, (a0)
209; ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
210; ZVE32F-NEXT:    vslidedown.vi v9, v8, 2
211; ZVE32F-NEXT:    vslideup.vi v8, v9, 1
212; ZVE32F-NEXT:    vse32.v v8, (a1)
213; ZVE32F-NEXT:    ret
214entry:
215  %0 = load <4 x float>, ptr %in, align 4
216  %shuffle.i5 = shufflevector <4 x float> %0, <4 x float> poison, <2 x i32> <i32 0, i32 2>
217  store <2 x float> %shuffle.i5, ptr %out, align 4
218  ret void
219}
220
221define void @vnsrl_32_float(ptr %in, ptr %out) {
222; V-LABEL: vnsrl_32_float:
223; V:       # %bb.0: # %entry
224; V-NEXT:    vsetivli zero, 4, e32, mf2, ta, ma
225; V-NEXT:    vle32.v v8, (a0)
226; V-NEXT:    li a0, 32
227; V-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
228; V-NEXT:    vnsrl.wx v8, v8, a0
229; V-NEXT:    vse32.v v8, (a1)
230; V-NEXT:    ret
231;
232; ZVE32F-LABEL: vnsrl_32_float:
233; ZVE32F:       # %bb.0: # %entry
234; ZVE32F-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
235; ZVE32F-NEXT:    vle32.v v8, (a0)
236; ZVE32F-NEXT:    vmv.v.i v0, 1
237; ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, ta, mu
238; ZVE32F-NEXT:    vslidedown.vi v9, v8, 2
239; ZVE32F-NEXT:    vrgather.vi v9, v8, 1, v0.t
240; ZVE32F-NEXT:    vse32.v v9, (a1)
241; ZVE32F-NEXT:    ret
242entry:
243  %0 = load <4 x float>, ptr %in, align 4
244  %shuffle.i5 = shufflevector <4 x float> %0, <4 x float> poison, <2 x i32> <i32 1, i32 3>
245  store <2 x float> %shuffle.i5, ptr %out, align 4
246  ret void
247}
248
249define void @vnsrl_0_i64(ptr %in, ptr %out) {
250; V-LABEL: vnsrl_0_i64:
251; V:       # %bb.0: # %entry
252; V-NEXT:    vsetivli zero, 4, e64, m1, ta, ma
253; V-NEXT:    vle64.v v8, (a0)
254; V-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
255; V-NEXT:    vslidedown.vi v9, v8, 2
256; V-NEXT:    vslideup.vi v8, v9, 1
257; V-NEXT:    vse64.v v8, (a1)
258; V-NEXT:    ret
259;
260; ZVE32F-LABEL: vnsrl_0_i64:
261; ZVE32F:       # %bb.0: # %entry
262; ZVE32F-NEXT:    ld a2, 0(a0)
263; ZVE32F-NEXT:    ld a0, 16(a0)
264; ZVE32F-NEXT:    sd a2, 0(a1)
265; ZVE32F-NEXT:    sd a0, 8(a1)
266; ZVE32F-NEXT:    ret
267entry:
268  %0 = load <4 x i64>, ptr %in, align 8
269  %shuffle.i5 = shufflevector <4 x i64> %0, <4 x i64> poison, <2 x i32> <i32 0, i32 2>
270  store <2 x i64> %shuffle.i5, ptr %out, align 8
271  ret void
272}
273
274define void @vnsrl_64_i64(ptr %in, ptr %out) {
275; V-LABEL: vnsrl_64_i64:
276; V:       # %bb.0: # %entry
277; V-NEXT:    vsetivli zero, 4, e64, m1, ta, ma
278; V-NEXT:    vle64.v v8, (a0)
279; V-NEXT:    vmv.v.i v0, 1
280; V-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
281; V-NEXT:    vslidedown.vi v9, v8, 2
282; V-NEXT:    vrgather.vi v9, v8, 1, v0.t
283; V-NEXT:    vse64.v v9, (a1)
284; V-NEXT:    ret
285;
286; ZVE32F-LABEL: vnsrl_64_i64:
287; ZVE32F:       # %bb.0: # %entry
288; ZVE32F-NEXT:    ld a2, 8(a0)
289; ZVE32F-NEXT:    ld a0, 24(a0)
290; ZVE32F-NEXT:    sd a2, 0(a1)
291; ZVE32F-NEXT:    sd a0, 8(a1)
292; ZVE32F-NEXT:    ret
293entry:
294  %0 = load <4 x i64>, ptr %in, align 8
295  %shuffle.i5 = shufflevector <4 x i64> %0, <4 x i64> poison, <2 x i32> <i32 1, i32 3>
296  store <2 x i64> %shuffle.i5, ptr %out, align 8
297  ret void
298}
299
300define void @vnsrl_0_double(ptr %in, ptr %out) {
301; V-LABEL: vnsrl_0_double:
302; V:       # %bb.0: # %entry
303; V-NEXT:    vsetivli zero, 4, e64, m1, ta, ma
304; V-NEXT:    vle64.v v8, (a0)
305; V-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
306; V-NEXT:    vslidedown.vi v9, v8, 2
307; V-NEXT:    vslideup.vi v8, v9, 1
308; V-NEXT:    vse64.v v8, (a1)
309; V-NEXT:    ret
310;
311; ZVE32F-LABEL: vnsrl_0_double:
312; ZVE32F:       # %bb.0: # %entry
313; ZVE32F-NEXT:    ld a2, 0(a0)
314; ZVE32F-NEXT:    ld a0, 16(a0)
315; ZVE32F-NEXT:    sd a2, 0(a1)
316; ZVE32F-NEXT:    sd a0, 8(a1)
317; ZVE32F-NEXT:    ret
318entry:
319  %0 = load <4 x double>, ptr %in, align 8
320  %shuffle.i5 = shufflevector <4 x double> %0, <4 x double> poison, <2 x i32> <i32 0, i32 2>
321  store <2 x double> %shuffle.i5, ptr %out, align 8
322  ret void
323}
324
325define void @vnsrl_64_double(ptr %in, ptr %out) {
326; V-LABEL: vnsrl_64_double:
327; V:       # %bb.0: # %entry
328; V-NEXT:    vsetivli zero, 4, e64, m1, ta, ma
329; V-NEXT:    vle64.v v8, (a0)
330; V-NEXT:    vmv.v.i v0, 1
331; V-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
332; V-NEXT:    vslidedown.vi v9, v8, 2
333; V-NEXT:    vrgather.vi v9, v8, 1, v0.t
334; V-NEXT:    vse64.v v9, (a1)
335; V-NEXT:    ret
336;
337; ZVE32F-LABEL: vnsrl_64_double:
338; ZVE32F:       # %bb.0: # %entry
339; ZVE32F-NEXT:    ld a2, 8(a0)
340; ZVE32F-NEXT:    ld a0, 24(a0)
341; ZVE32F-NEXT:    sd a2, 0(a1)
342; ZVE32F-NEXT:    sd a0, 8(a1)
343; ZVE32F-NEXT:    ret
344entry:
345  %0 = load <4 x double>, ptr %in, align 8
346  %shuffle.i5 = shufflevector <4 x double> %0, <4 x double> poison, <2 x i32> <i32 1, i32 3>
347  store <2 x double> %shuffle.i5, ptr %out, align 8
348  ret void
349}
350
351define void @vnsrl_0_i8_undef(ptr %in, ptr %out) {
352; CHECK-LABEL: vnsrl_0_i8_undef:
353; CHECK:       # %bb.0: # %entry
354; CHECK-NEXT:    vsetivli zero, 16, e8, mf2, ta, ma
355; CHECK-NEXT:    vle8.v v8, (a0)
356; CHECK-NEXT:    vsetivli zero, 8, e8, mf4, ta, ma
357; CHECK-NEXT:    vnsrl.wi v8, v8, 0
358; CHECK-NEXT:    vse8.v v8, (a1)
359; CHECK-NEXT:    ret
360entry:
361  %0 = load <16 x i8>, ptr %in, align 1
362  %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 undef, i32 undef>
363  store <8 x i8> %shuffle.i5, ptr %out, align 1
364  ret void
365}
366
367define void @vnsrl_0_i8_undef2(ptr %in, ptr %out) {
368; CHECK-LABEL: vnsrl_0_i8_undef2:
369; CHECK:       # %bb.0: # %entry
370; CHECK-NEXT:    vsetivli zero, 16, e8, mf2, ta, ma
371; CHECK-NEXT:    vle8.v v8, (a0)
372; CHECK-NEXT:    vsetivli zero, 8, e8, mf4, ta, ma
373; CHECK-NEXT:    vnsrl.wi v8, v8, 0
374; CHECK-NEXT:    vse8.v v8, (a1)
375; CHECK-NEXT:    ret
376entry:
377  %0 = load <16 x i8>, ptr %in, align 1
378  %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 10, i32 12, i32 14>
379  store <8 x i8> %shuffle.i5, ptr %out, align 1
380  ret void
381}
382
383; TODO: Allow an undef initial element
384define void @vnsrl_0_i8_undef3(ptr %in, ptr %out) {
385; CHECK-LABEL: vnsrl_0_i8_undef3:
386; CHECK:       # %bb.0: # %entry
387; CHECK-NEXT:    vsetivli zero, 16, e8, mf2, ta, ma
388; CHECK-NEXT:    vle8.v v8, (a0)
389; CHECK-NEXT:    li a0, -32
390; CHECK-NEXT:    vmv.s.x v0, a0
391; CHECK-NEXT:    lui a0, 24640
392; CHECK-NEXT:    vsetivli zero, 8, e8, mf4, ta, ma
393; CHECK-NEXT:    vid.v v9
394; CHECK-NEXT:    addi a0, a0, 6
395; CHECK-NEXT:    vadd.vv v9, v9, v9
396; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
397; CHECK-NEXT:    vmv.v.x v10, a0
398; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
399; CHECK-NEXT:    vadd.vi v9, v9, -8
400; CHECK-NEXT:    vrgather.vv v11, v8, v10
401; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
402; CHECK-NEXT:    vslidedown.vi v8, v8, 8
403; CHECK-NEXT:    vsetivli zero, 8, e8, mf4, ta, mu
404; CHECK-NEXT:    vrgather.vv v11, v8, v9, v0.t
405; CHECK-NEXT:    vse8.v v11, (a1)
406; CHECK-NEXT:    ret
407entry:
408  %0 = load <16 x i8>, ptr %in, align 1
409  %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 undef, i32 undef, i32 4, i32 6, i32 6, i32 10, i32 12, i32 14>
410  store <8 x i8> %shuffle.i5, ptr %out, align 1
411  ret void
412}
413
414; Not a vnsrl (checking for a prior pattern matching bug)
415define void @vnsrl_0_i8_undef_negative(ptr %in, ptr %out) {
416; CHECK-LABEL: vnsrl_0_i8_undef_negative:
417; CHECK:       # %bb.0: # %entry
418; CHECK-NEXT:    vsetivli zero, 16, e8, mf2, ta, ma
419; CHECK-NEXT:    vle8.v v8, (a0)
420; CHECK-NEXT:    lui a0, %hi(.LCPI17_0)
421; CHECK-NEXT:    addi a0, a0, %lo(.LCPI17_0)
422; CHECK-NEXT:    vsetivli zero, 8, e8, mf4, ta, ma
423; CHECK-NEXT:    vid.v v9
424; CHECK-NEXT:    vle8.v v10, (a0)
425; CHECK-NEXT:    li a0, 48
426; CHECK-NEXT:    vadd.vv v9, v9, v9
427; CHECK-NEXT:    vmv.s.x v0, a0
428; CHECK-NEXT:    vadd.vi v9, v9, -8
429; CHECK-NEXT:    vrgather.vv v11, v8, v10
430; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
431; CHECK-NEXT:    vslidedown.vi v8, v8, 8
432; CHECK-NEXT:    vsetivli zero, 8, e8, mf4, ta, mu
433; CHECK-NEXT:    vrgather.vv v11, v8, v9, v0.t
434; CHECK-NEXT:    vse8.v v11, (a1)
435; CHECK-NEXT:    ret
436entry:
437  %0 = load <16 x i8>, ptr %in, align 1
438  %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 undef, i32 1>
439  store <8 x i8> %shuffle.i5, ptr %out, align 1
440  ret void
441}
442
443define void @vnsrl_0_i8_single_src(ptr %in, ptr %out) {
444; V-LABEL: vnsrl_0_i8_single_src:
445; V:       # %bb.0: # %entry
446; V-NEXT:    vsetivli zero, 8, e8, mf4, ta, ma
447; V-NEXT:    vle8.v v8, (a0)
448; V-NEXT:    vsetivli zero, 4, e8, mf8, ta, ma
449; V-NEXT:    vnsrl.wi v8, v8, 0
450; V-NEXT:    vse8.v v8, (a1)
451; V-NEXT:    ret
452;
453; ZVE32F-LABEL: vnsrl_0_i8_single_src:
454; ZVE32F:       # %bb.0: # %entry
455; ZVE32F-NEXT:    vsetivli zero, 8, e8, mf4, ta, ma
456; ZVE32F-NEXT:    vle8.v v8, (a0)
457; ZVE32F-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
458; ZVE32F-NEXT:    vnsrl.wi v8, v8, 0
459; ZVE32F-NEXT:    vse8.v v8, (a1)
460; ZVE32F-NEXT:    ret
461entry:
462  %0 = load <8 x i8>, ptr %in, align 1
463  %shuffle.i5 = shufflevector <8 x i8> %0, <8 x i8> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
464  store <4 x i8> %shuffle.i5, ptr %out, align 1
465  ret void
466}
467
468define void @vnsrl_8_i8_single_src(ptr %in, ptr %out) {
469; V-LABEL: vnsrl_8_i8_single_src:
470; V:       # %bb.0: # %entry
471; V-NEXT:    vsetivli zero, 8, e8, mf4, ta, ma
472; V-NEXT:    vle8.v v8, (a0)
473; V-NEXT:    vsetivli zero, 4, e8, mf8, ta, ma
474; V-NEXT:    vnsrl.wi v8, v8, 8
475; V-NEXT:    vse8.v v8, (a1)
476; V-NEXT:    ret
477;
478; ZVE32F-LABEL: vnsrl_8_i8_single_src:
479; ZVE32F:       # %bb.0: # %entry
480; ZVE32F-NEXT:    vsetivli zero, 8, e8, mf4, ta, ma
481; ZVE32F-NEXT:    vle8.v v8, (a0)
482; ZVE32F-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
483; ZVE32F-NEXT:    vnsrl.wi v8, v8, 8
484; ZVE32F-NEXT:    vse8.v v8, (a1)
485; ZVE32F-NEXT:    ret
486entry:
487  %0 = load <8 x i8>, ptr %in, align 1
488  %shuffle.i5 = shufflevector <8 x i8> %0, <8 x i8> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
489  store <4 x i8> %shuffle.i5, ptr %out, align 1
490  ret void
491}
492
493define void @vnsrl_0_i8_single_wideuse(ptr %in, ptr %out) {
494; V-LABEL: vnsrl_0_i8_single_wideuse:
495; V:       # %bb.0: # %entry
496; V-NEXT:    vsetivli zero, 8, e8, mf4, ta, ma
497; V-NEXT:    vle8.v v8, (a0)
498; V-NEXT:    vsetivli zero, 4, e8, mf8, ta, ma
499; V-NEXT:    vnsrl.wi v8, v8, 0
500; V-NEXT:    vsetivli zero, 8, e8, mf4, ta, ma
501; V-NEXT:    vse8.v v8, (a1)
502; V-NEXT:    ret
503;
504; ZVE32F-LABEL: vnsrl_0_i8_single_wideuse:
505; ZVE32F:       # %bb.0: # %entry
506; ZVE32F-NEXT:    vsetivli zero, 8, e8, mf4, ta, ma
507; ZVE32F-NEXT:    vle8.v v8, (a0)
508; ZVE32F-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
509; ZVE32F-NEXT:    vnsrl.wi v8, v8, 0
510; ZVE32F-NEXT:    vsetivli zero, 8, e8, mf4, ta, ma
511; ZVE32F-NEXT:    vse8.v v8, (a1)
512; ZVE32F-NEXT:    ret
513entry:
514  %0 = load <8 x i8>, ptr %in, align 1
515  %shuffle.i5 = shufflevector <8 x i8> %0, <8 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>
516  store <8 x i8> %shuffle.i5, ptr %out, align 1
517  ret void
518}
519
520; Can't match the m8 result type as the source would have to be m16 which
521; isn't a legal type.
522define void @vnsrl_0_i32_single_src_m8(ptr %in, ptr %out) {
523; V-LABEL: vnsrl_0_i32_single_src_m8:
524; V:       # %bb.0: # %entry
525; V-NEXT:    li a2, 64
526; V-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
527; V-NEXT:    vle32.v v8, (a0)
528; V-NEXT:    lui a0, 341
529; V-NEXT:    addiw a0, a0, 1365
530; V-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
531; V-NEXT:    vmv.s.x v16, a0
532; V-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
533; V-NEXT:    vcompress.vm v24, v8, v16
534; V-NEXT:    vse32.v v24, (a1)
535; V-NEXT:    ret
536;
537; ZVE32F-LABEL: vnsrl_0_i32_single_src_m8:
538; ZVE32F:       # %bb.0: # %entry
539; ZVE32F-NEXT:    li a2, 64
540; ZVE32F-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
541; ZVE32F-NEXT:    vle32.v v8, (a0)
542; ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
543; ZVE32F-NEXT:    vmv.v.i v16, 0
544; ZVE32F-NEXT:    lui a0, 341
545; ZVE32F-NEXT:    addi a0, a0, 1365
546; ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, tu, ma
547; ZVE32F-NEXT:    vmv.s.x v16, a0
548; ZVE32F-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
549; ZVE32F-NEXT:    vcompress.vm v24, v8, v16
550; ZVE32F-NEXT:    vse32.v v24, (a1)
551; ZVE32F-NEXT:    ret
552entry:
553  %0 = load <64 x i32>, ptr %in, align 4
554  %shuffle.i5 = shufflevector <64 x i32> %0, <64 x i32> poison, <64 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
555  store <64 x i32> %shuffle.i5, ptr %out, align 4
556  ret void
557}
558