xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int.ll (revision 9122c5235ec85ce0c0ad337e862b006e7b349d84)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s
3
4define <vscale x 1 x i64> @test_vp_reverse_nxv1i64_masked(<vscale x 1 x i64> %src, <vscale x 1 x i1> %mask, i32 zeroext %evl) {
5; CHECK-LABEL: test_vp_reverse_nxv1i64_masked:
6; CHECK:       # %bb.0:
7; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
8; CHECK-NEXT:    vid.v v9, v0.t
9; CHECK-NEXT:    addi a0, a0, -1
10; CHECK-NEXT:    vrsub.vx v10, v9, a0, v0.t
11; CHECK-NEXT:    vrgather.vv v9, v8, v10, v0.t
12; CHECK-NEXT:    vmv.v.v v8, v9
13; CHECK-NEXT:    ret
14  %dst = call <vscale x 1 x i64> @llvm.experimental.vp.reverse.nxv1i64(<vscale x 1 x i64> %src, <vscale x 1 x i1> %mask, i32 %evl)
15  ret <vscale x 1 x i64> %dst
16}
17
18define <vscale x 1 x i64> @test_vp_reverse_nxv1i64(<vscale x 1 x i64> %src, i32 zeroext %evl) {
19; CHECK-LABEL: test_vp_reverse_nxv1i64:
20; CHECK:       # %bb.0:
21; CHECK-NEXT:    addi a1, a0, -1
22; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
23; CHECK-NEXT:    vid.v v9
24; CHECK-NEXT:    vrsub.vx v10, v9, a1
25; CHECK-NEXT:    vrgather.vv v9, v8, v10
26; CHECK-NEXT:    vmv.v.v v8, v9
27; CHECK-NEXT:    ret
28
29  %dst = call <vscale x 1 x i64> @llvm.experimental.vp.reverse.nxv1i64(<vscale x 1 x i64> %src, <vscale x 1 x i1> splat (i1 1), i32 %evl)
30  ret <vscale x 1 x i64> %dst
31}
32
33define <vscale x 2 x i32> @test_vp_reverse_nxv2i32_masked(<vscale x 2 x i32> %src, <vscale x 2 x i1> %mask, i32 zeroext %evl) {
34; CHECK-LABEL: test_vp_reverse_nxv2i32_masked:
35; CHECK:       # %bb.0:
36; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
37; CHECK-NEXT:    vid.v v9, v0.t
38; CHECK-NEXT:    addi a0, a0, -1
39; CHECK-NEXT:    vrsub.vx v10, v9, a0, v0.t
40; CHECK-NEXT:    vrgather.vv v9, v8, v10, v0.t
41; CHECK-NEXT:    vmv.v.v v8, v9
42; CHECK-NEXT:    ret
43  %dst = call <vscale x 2 x i32> @llvm.experimental.vp.reverse.nxv2i32(<vscale x 2 x i32> %src, <vscale x 2 x i1> %mask, i32 %evl)
44  ret <vscale x 2 x i32> %dst
45}
46
47define <vscale x 2 x i32> @test_vp_reverse_nxv2i32(<vscale x 2 x i32> %src, i32 zeroext %evl) {
48; CHECK-LABEL: test_vp_reverse_nxv2i32:
49; CHECK:       # %bb.0:
50; CHECK-NEXT:    addi a1, a0, -1
51; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
52; CHECK-NEXT:    vid.v v9
53; CHECK-NEXT:    vrsub.vx v10, v9, a1
54; CHECK-NEXT:    vrgather.vv v9, v8, v10
55; CHECK-NEXT:    vmv.v.v v8, v9
56; CHECK-NEXT:    ret
57
58  %dst = call <vscale x 2 x i32> @llvm.experimental.vp.reverse.nxv2i32(<vscale x 2 x i32> %src, <vscale x 2 x i1> splat (i1 1), i32 %evl)
59  ret <vscale x 2 x i32> %dst
60}
61
62define <vscale x 4 x i16> @test_vp_reverse_nxv4i16_masked(<vscale x 4 x i16> %src, <vscale x 4 x i1> %mask, i32 zeroext %evl) {
63; CHECK-LABEL: test_vp_reverse_nxv4i16_masked:
64; CHECK:       # %bb.0:
65; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
66; CHECK-NEXT:    vid.v v9, v0.t
67; CHECK-NEXT:    addi a0, a0, -1
68; CHECK-NEXT:    vrsub.vx v10, v9, a0, v0.t
69; CHECK-NEXT:    vrgather.vv v9, v8, v10, v0.t
70; CHECK-NEXT:    vmv.v.v v8, v9
71; CHECK-NEXT:    ret
72  %dst = call <vscale x 4 x i16> @llvm.experimental.vp.reverse.nxv4i16(<vscale x 4 x i16> %src, <vscale x 4 x i1> %mask, i32 %evl)
73  ret <vscale x 4 x i16> %dst
74}
75
76define <vscale x 4 x i16> @test_vp_reverse_nxv4i16(<vscale x 4 x i16> %src, i32 zeroext %evl) {
77; CHECK-LABEL: test_vp_reverse_nxv4i16:
78; CHECK:       # %bb.0:
79; CHECK-NEXT:    addi a1, a0, -1
80; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
81; CHECK-NEXT:    vid.v v9
82; CHECK-NEXT:    vrsub.vx v10, v9, a1
83; CHECK-NEXT:    vrgather.vv v9, v8, v10
84; CHECK-NEXT:    vmv.v.v v8, v9
85; CHECK-NEXT:    ret
86
87  %dst = call <vscale x 4 x i16> @llvm.experimental.vp.reverse.nxv4i16(<vscale x 4 x i16> %src, <vscale x 4 x i1> splat (i1 1), i32 %evl)
88  ret <vscale x 4 x i16> %dst
89}
90
91define <vscale x 8 x i8> @test_vp_reverse_nxv8i8_masked(<vscale x 8 x i8> %src, <vscale x 8 x i1> %mask, i32 zeroext %evl) {
92; CHECK-LABEL: test_vp_reverse_nxv8i8_masked:
93; CHECK:       # %bb.0:
94; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
95; CHECK-NEXT:    vid.v v10, v0.t
96; CHECK-NEXT:    addi a0, a0, -1
97; CHECK-NEXT:    vrsub.vx v10, v10, a0, v0.t
98; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
99; CHECK-NEXT:    vrgatherei16.vv v9, v8, v10, v0.t
100; CHECK-NEXT:    vmv.v.v v8, v9
101; CHECK-NEXT:    ret
102  %dst = call <vscale x 8 x i8> @llvm.experimental.vp.reverse.nxv8i8(<vscale x 8 x i8> %src, <vscale x 8 x i1> %mask, i32 %evl)
103  ret <vscale x 8 x i8> %dst
104}
105
106define <vscale x 8 x i8> @test_vp_reverse_nxv8i8(<vscale x 8 x i8> %src, i32 zeroext %evl) {
107; CHECK-LABEL: test_vp_reverse_nxv8i8:
108; CHECK:       # %bb.0:
109; CHECK-NEXT:    addi a1, a0, -1
110; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
111; CHECK-NEXT:    vid.v v10
112; CHECK-NEXT:    vrsub.vx v10, v10, a1
113; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
114; CHECK-NEXT:    vrgatherei16.vv v9, v8, v10
115; CHECK-NEXT:    vmv.v.v v8, v9
116; CHECK-NEXT:    ret
117
118  %dst = call <vscale x 8 x i8> @llvm.experimental.vp.reverse.nxv8i8(<vscale x 8 x i8> %src, <vscale x 8 x i1> splat (i1 1), i32 %evl)
119  ret <vscale x 8 x i8> %dst
120}
121
122define <vscale x 2 x i64> @test_vp_reverse_nxv2i64_masked(<vscale x 2 x i64> %src, <vscale x 2 x i1> %mask, i32 zeroext %evl) {
123; CHECK-LABEL: test_vp_reverse_nxv2i64_masked:
124; CHECK:       # %bb.0:
125; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
126; CHECK-NEXT:    vid.v v10, v0.t
127; CHECK-NEXT:    addi a0, a0, -1
128; CHECK-NEXT:    vrsub.vx v12, v10, a0, v0.t
129; CHECK-NEXT:    vrgather.vv v10, v8, v12, v0.t
130; CHECK-NEXT:    vmv.v.v v8, v10
131; CHECK-NEXT:    ret
132  %dst = call <vscale x 2 x i64> @llvm.experimental.vp.reverse.nxv2i64(<vscale x 2 x i64> %src, <vscale x 2 x i1> %mask, i32 %evl)
133  ret <vscale x 2 x i64> %dst
134}
135
136define <vscale x 2 x i64> @test_vp_reverse_nxv2i64(<vscale x 2 x i64> %src, i32 zeroext %evl) {
137; CHECK-LABEL: test_vp_reverse_nxv2i64:
138; CHECK:       # %bb.0:
139; CHECK-NEXT:    addi a1, a0, -1
140; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
141; CHECK-NEXT:    vid.v v10
142; CHECK-NEXT:    vrsub.vx v12, v10, a1
143; CHECK-NEXT:    vrgather.vv v10, v8, v12
144; CHECK-NEXT:    vmv.v.v v8, v10
145; CHECK-NEXT:    ret
146
147  %dst = call <vscale x 2 x i64> @llvm.experimental.vp.reverse.nxv2i64(<vscale x 2 x i64> %src, <vscale x 2 x i1> splat (i1 1), i32 %evl)
148  ret <vscale x 2 x i64> %dst
149}
150
151define <vscale x 4 x i32> @test_vp_reverse_nxv4i32_masked(<vscale x 4 x i32> %src, <vscale x 4 x i1> %mask, i32 zeroext %evl) {
152; CHECK-LABEL: test_vp_reverse_nxv4i32_masked:
153; CHECK:       # %bb.0:
154; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
155; CHECK-NEXT:    vid.v v10, v0.t
156; CHECK-NEXT:    addi a0, a0, -1
157; CHECK-NEXT:    vrsub.vx v12, v10, a0, v0.t
158; CHECK-NEXT:    vrgather.vv v10, v8, v12, v0.t
159; CHECK-NEXT:    vmv.v.v v8, v10
160; CHECK-NEXT:    ret
161  %dst = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> %src, <vscale x 4 x i1> %mask, i32 %evl)
162  ret <vscale x 4 x i32> %dst
163}
164
165define <vscale x 4 x i32> @test_vp_reverse_nxv4i32(<vscale x 4 x i32> %src, i32 zeroext %evl) {
166; CHECK-LABEL: test_vp_reverse_nxv4i32:
167; CHECK:       # %bb.0:
168; CHECK-NEXT:    addi a1, a0, -1
169; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
170; CHECK-NEXT:    vid.v v10
171; CHECK-NEXT:    vrsub.vx v12, v10, a1
172; CHECK-NEXT:    vrgather.vv v10, v8, v12
173; CHECK-NEXT:    vmv.v.v v8, v10
174; CHECK-NEXT:    ret
175
176  %dst = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> %src, <vscale x 4 x i1> splat (i1 1), i32 %evl)
177  ret <vscale x 4 x i32> %dst
178}
179
180define <vscale x 8 x i16> @test_vp_reverse_nxv8i16_masked(<vscale x 8 x i16> %src, <vscale x 8 x i1> %mask, i32 zeroext %evl) {
181; CHECK-LABEL: test_vp_reverse_nxv8i16_masked:
182; CHECK:       # %bb.0:
183; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
184; CHECK-NEXT:    vid.v v10, v0.t
185; CHECK-NEXT:    addi a0, a0, -1
186; CHECK-NEXT:    vrsub.vx v12, v10, a0, v0.t
187; CHECK-NEXT:    vrgather.vv v10, v8, v12, v0.t
188; CHECK-NEXT:    vmv.v.v v8, v10
189; CHECK-NEXT:    ret
190  %dst = call <vscale x 8 x i16> @llvm.experimental.vp.reverse.nxv8i16(<vscale x 8 x i16> %src, <vscale x 8 x i1> %mask, i32 %evl)
191  ret <vscale x 8 x i16> %dst
192}
193
194define <vscale x 8 x i16> @test_vp_reverse_nxv8i16(<vscale x 8 x i16> %src, i32 zeroext %evl) {
195; CHECK-LABEL: test_vp_reverse_nxv8i16:
196; CHECK:       # %bb.0:
197; CHECK-NEXT:    addi a1, a0, -1
198; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
199; CHECK-NEXT:    vid.v v10
200; CHECK-NEXT:    vrsub.vx v12, v10, a1
201; CHECK-NEXT:    vrgather.vv v10, v8, v12
202; CHECK-NEXT:    vmv.v.v v8, v10
203; CHECK-NEXT:    ret
204
205  %dst = call <vscale x 8 x i16> @llvm.experimental.vp.reverse.nxv8i16(<vscale x 8 x i16> %src, <vscale x 8 x i1> splat (i1 1), i32 %evl)
206  ret <vscale x 8 x i16> %dst
207}
208
209define <vscale x 16 x i8> @test_vp_reverse_nxv16i8_masked(<vscale x 16 x i8> %src, <vscale x 16 x i1> %mask, i32 zeroext %evl) {
210; CHECK-LABEL: test_vp_reverse_nxv16i8_masked:
211; CHECK:       # %bb.0:
212; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
213; CHECK-NEXT:    vid.v v12, v0.t
214; CHECK-NEXT:    addi a0, a0, -1
215; CHECK-NEXT:    vrsub.vx v12, v12, a0, v0.t
216; CHECK-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
217; CHECK-NEXT:    vrgatherei16.vv v10, v8, v12, v0.t
218; CHECK-NEXT:    vmv.v.v v8, v10
219; CHECK-NEXT:    ret
220  %dst = call <vscale x 16 x i8> @llvm.experimental.vp.reverse.nxv16i8(<vscale x 16 x i8> %src, <vscale x 16 x i1> %mask, i32 %evl)
221  ret <vscale x 16 x i8> %dst
222}
223
224define <vscale x 16 x i8> @test_vp_reverse_nxv16i8(<vscale x 16 x i8> %src, i32 zeroext %evl) {
225; CHECK-LABEL: test_vp_reverse_nxv16i8:
226; CHECK:       # %bb.0:
227; CHECK-NEXT:    addi a1, a0, -1
228; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
229; CHECK-NEXT:    vid.v v12
230; CHECK-NEXT:    vrsub.vx v12, v12, a1
231; CHECK-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
232; CHECK-NEXT:    vrgatherei16.vv v10, v8, v12
233; CHECK-NEXT:    vmv.v.v v8, v10
234; CHECK-NEXT:    ret
235
236  %dst = call <vscale x 16 x i8> @llvm.experimental.vp.reverse.nxv16i8(<vscale x 16 x i8> %src, <vscale x 16 x i1> splat (i1 1), i32 %evl)
237  ret <vscale x 16 x i8> %dst
238}
239
240define <vscale x 4 x i64> @test_vp_reverse_nxv4i64_masked(<vscale x 4 x i64> %src, <vscale x 4 x i1> %mask, i32 zeroext %evl) {
241; CHECK-LABEL: test_vp_reverse_nxv4i64_masked:
242; CHECK:       # %bb.0:
243; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
244; CHECK-NEXT:    vid.v v12, v0.t
245; CHECK-NEXT:    addi a0, a0, -1
246; CHECK-NEXT:    vrsub.vx v16, v12, a0, v0.t
247; CHECK-NEXT:    vrgather.vv v12, v8, v16, v0.t
248; CHECK-NEXT:    vmv.v.v v8, v12
249; CHECK-NEXT:    ret
250  %dst = call <vscale x 4 x i64> @llvm.experimental.vp.reverse.nxv4i64(<vscale x 4 x i64> %src, <vscale x 4 x i1> %mask, i32 %evl)
251  ret <vscale x 4 x i64> %dst
252}
253
254define <vscale x 4 x i64> @test_vp_reverse_nxv4i64(<vscale x 4 x i64> %src, i32 zeroext %evl) {
255; CHECK-LABEL: test_vp_reverse_nxv4i64:
256; CHECK:       # %bb.0:
257; CHECK-NEXT:    addi a1, a0, -1
258; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
259; CHECK-NEXT:    vid.v v12
260; CHECK-NEXT:    vrsub.vx v16, v12, a1
261; CHECK-NEXT:    vrgather.vv v12, v8, v16
262; CHECK-NEXT:    vmv.v.v v8, v12
263; CHECK-NEXT:    ret
264
265  %dst = call <vscale x 4 x i64> @llvm.experimental.vp.reverse.nxv4i64(<vscale x 4 x i64> %src, <vscale x 4 x i1> splat (i1 1), i32 %evl)
266  ret <vscale x 4 x i64> %dst
267}
268
269define <vscale x 8 x i32> @test_vp_reverse_nxv8i32_masked(<vscale x 8 x i32> %src, <vscale x 8 x i1> %mask, i32 zeroext %evl) {
270; CHECK-LABEL: test_vp_reverse_nxv8i32_masked:
271; CHECK:       # %bb.0:
272; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
273; CHECK-NEXT:    vid.v v12, v0.t
274; CHECK-NEXT:    addi a0, a0, -1
275; CHECK-NEXT:    vrsub.vx v16, v12, a0, v0.t
276; CHECK-NEXT:    vrgather.vv v12, v8, v16, v0.t
277; CHECK-NEXT:    vmv.v.v v8, v12
278; CHECK-NEXT:    ret
279  %dst = call <vscale x 8 x i32> @llvm.experimental.vp.reverse.nxv8i32(<vscale x 8 x i32> %src, <vscale x 8 x i1> %mask, i32 %evl)
280  ret <vscale x 8 x i32> %dst
281}
282
283define <vscale x 8 x i32> @test_vp_reverse_nxv8i32(<vscale x 8 x i32> %src, i32 zeroext %evl) {
284; CHECK-LABEL: test_vp_reverse_nxv8i32:
285; CHECK:       # %bb.0:
286; CHECK-NEXT:    addi a1, a0, -1
287; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
288; CHECK-NEXT:    vid.v v12
289; CHECK-NEXT:    vrsub.vx v16, v12, a1
290; CHECK-NEXT:    vrgather.vv v12, v8, v16
291; CHECK-NEXT:    vmv.v.v v8, v12
292; CHECK-NEXT:    ret
293
294  %dst = call <vscale x 8 x i32> @llvm.experimental.vp.reverse.nxv8i32(<vscale x 8 x i32> %src, <vscale x 8 x i1> splat (i1 1), i32 %evl)
295  ret <vscale x 8 x i32> %dst
296}
297
298define <vscale x 16 x i16> @test_vp_reverse_nxv16i16_masked(<vscale x 16 x i16> %src, <vscale x 16 x i1> %mask, i32 zeroext %evl) {
299; CHECK-LABEL: test_vp_reverse_nxv16i16_masked:
300; CHECK:       # %bb.0:
301; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
302; CHECK-NEXT:    vid.v v12, v0.t
303; CHECK-NEXT:    addi a0, a0, -1
304; CHECK-NEXT:    vrsub.vx v16, v12, a0, v0.t
305; CHECK-NEXT:    vrgather.vv v12, v8, v16, v0.t
306; CHECK-NEXT:    vmv.v.v v8, v12
307; CHECK-NEXT:    ret
308  %dst = call <vscale x 16 x i16> @llvm.experimental.vp.reverse.nxv16i16(<vscale x 16 x i16> %src, <vscale x 16 x i1> %mask, i32 %evl)
309  ret <vscale x 16 x i16> %dst
310}
311
312define <vscale x 16 x i16> @test_vp_reverse_nxv16i16(<vscale x 16 x i16> %src, i32 zeroext %evl) {
313; CHECK-LABEL: test_vp_reverse_nxv16i16:
314; CHECK:       # %bb.0:
315; CHECK-NEXT:    addi a1, a0, -1
316; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
317; CHECK-NEXT:    vid.v v12
318; CHECK-NEXT:    vrsub.vx v16, v12, a1
319; CHECK-NEXT:    vrgather.vv v12, v8, v16
320; CHECK-NEXT:    vmv.v.v v8, v12
321; CHECK-NEXT:    ret
322
323  %dst = call <vscale x 16 x i16> @llvm.experimental.vp.reverse.nxv16i16(<vscale x 16 x i16> %src, <vscale x 16 x i1> splat (i1 1), i32 %evl)
324  ret <vscale x 16 x i16> %dst
325}
326
327define <vscale x 32 x i8> @test_vp_reverse_nxv32i8_masked(<vscale x 32 x i8> %src, <vscale x 32 x i1> %mask, i32 zeroext %evl) {
328; CHECK-LABEL: test_vp_reverse_nxv32i8_masked:
329; CHECK:       # %bb.0:
330; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
331; CHECK-NEXT:    vid.v v16, v0.t
332; CHECK-NEXT:    addi a0, a0, -1
333; CHECK-NEXT:    vrsub.vx v16, v16, a0, v0.t
334; CHECK-NEXT:    vsetvli zero, zero, e8, m4, ta, ma
335; CHECK-NEXT:    vrgatherei16.vv v12, v8, v16, v0.t
336; CHECK-NEXT:    vmv.v.v v8, v12
337; CHECK-NEXT:    ret
338  %dst = call <vscale x 32 x i8> @llvm.experimental.vp.reverse.nxv32i8(<vscale x 32 x i8> %src, <vscale x 32 x i1> %mask, i32 %evl)
339  ret <vscale x 32 x i8> %dst
340}
341
342define <vscale x 32 x i8> @test_vp_reverse_nxv32i8(<vscale x 32 x i8> %src, i32 zeroext %evl) {
343; CHECK-LABEL: test_vp_reverse_nxv32i8:
344; CHECK:       # %bb.0:
345; CHECK-NEXT:    addi a1, a0, -1
346; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
347; CHECK-NEXT:    vid.v v16
348; CHECK-NEXT:    vrsub.vx v16, v16, a1
349; CHECK-NEXT:    vsetvli zero, zero, e8, m4, ta, ma
350; CHECK-NEXT:    vrgatherei16.vv v12, v8, v16
351; CHECK-NEXT:    vmv.v.v v8, v12
352; CHECK-NEXT:    ret
353
354  %dst = call <vscale x 32 x i8> @llvm.experimental.vp.reverse.nxv32i8(<vscale x 32 x i8> %src, <vscale x 32 x i1> splat (i1 1), i32 %evl)
355  ret <vscale x 32 x i8> %dst
356}
357
358define <vscale x 8 x i64> @test_vp_reverse_nxv8i64_masked(<vscale x 8 x i64> %src, <vscale x 8 x i1> %mask, i32 zeroext %evl) {
359; CHECK-LABEL: test_vp_reverse_nxv8i64_masked:
360; CHECK:       # %bb.0:
361; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
362; CHECK-NEXT:    vid.v v16, v0.t
363; CHECK-NEXT:    addi a0, a0, -1
364; CHECK-NEXT:    vrsub.vx v24, v16, a0, v0.t
365; CHECK-NEXT:    vrgather.vv v16, v8, v24, v0.t
366; CHECK-NEXT:    vmv.v.v v8, v16
367; CHECK-NEXT:    ret
368  %dst = call <vscale x 8 x i64> @llvm.experimental.vp.reverse.nxv8i64(<vscale x 8 x i64> %src, <vscale x 8 x i1> %mask, i32 %evl)
369  ret <vscale x 8 x i64> %dst
370}
371
372define <vscale x 8 x i64> @test_vp_reverse_nxv8i64(<vscale x 8 x i64> %src, i32 zeroext %evl) {
373; CHECK-LABEL: test_vp_reverse_nxv8i64:
374; CHECK:       # %bb.0:
375; CHECK-NEXT:    addi a1, a0, -1
376; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
377; CHECK-NEXT:    vid.v v16
378; CHECK-NEXT:    vrsub.vx v24, v16, a1
379; CHECK-NEXT:    vrgather.vv v16, v8, v24
380; CHECK-NEXT:    vmv.v.v v8, v16
381; CHECK-NEXT:    ret
382
383  %dst = call <vscale x 8 x i64> @llvm.experimental.vp.reverse.nxv8i64(<vscale x 8 x i64> %src, <vscale x 8 x i1> splat (i1 1), i32 %evl)
384  ret <vscale x 8 x i64> %dst
385}
386
387define <vscale x 16 x i32> @test_vp_reverse_nxv16i32_masked(<vscale x 16 x i32> %src, <vscale x 16 x i1> %mask, i32 zeroext %evl) {
388; CHECK-LABEL: test_vp_reverse_nxv16i32_masked:
389; CHECK:       # %bb.0:
390; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
391; CHECK-NEXT:    vid.v v16, v0.t
392; CHECK-NEXT:    addi a0, a0, -1
393; CHECK-NEXT:    vrsub.vx v24, v16, a0, v0.t
394; CHECK-NEXT:    vrgather.vv v16, v8, v24, v0.t
395; CHECK-NEXT:    vmv.v.v v8, v16
396; CHECK-NEXT:    ret
397  %dst = call <vscale x 16 x i32> @llvm.experimental.vp.reverse.nxv16i32(<vscale x 16 x i32> %src, <vscale x 16 x i1> %mask, i32 %evl)
398  ret <vscale x 16 x i32> %dst
399}
400
401define <vscale x 16 x i32> @test_vp_reverse_nxv16i32(<vscale x 16 x i32> %src, i32 zeroext %evl) {
402; CHECK-LABEL: test_vp_reverse_nxv16i32:
403; CHECK:       # %bb.0:
404; CHECK-NEXT:    addi a1, a0, -1
405; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
406; CHECK-NEXT:    vid.v v16
407; CHECK-NEXT:    vrsub.vx v24, v16, a1
408; CHECK-NEXT:    vrgather.vv v16, v8, v24
409; CHECK-NEXT:    vmv.v.v v8, v16
410; CHECK-NEXT:    ret
411
412  %dst = call <vscale x 16 x i32> @llvm.experimental.vp.reverse.nxv16i32(<vscale x 16 x i32> %src, <vscale x 16 x i1> splat (i1 1), i32 %evl)
413  ret <vscale x 16 x i32> %dst
414}
415
416define <vscale x 32 x i16> @test_vp_reverse_nxv32i16_masked(<vscale x 32 x i16> %src, <vscale x 32 x i1> %mask, i32 zeroext %evl) {
417; CHECK-LABEL: test_vp_reverse_nxv32i16_masked:
418; CHECK:       # %bb.0:
419; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
420; CHECK-NEXT:    vid.v v16, v0.t
421; CHECK-NEXT:    addi a0, a0, -1
422; CHECK-NEXT:    vrsub.vx v24, v16, a0, v0.t
423; CHECK-NEXT:    vrgather.vv v16, v8, v24, v0.t
424; CHECK-NEXT:    vmv.v.v v8, v16
425; CHECK-NEXT:    ret
426  %dst = call <vscale x 32 x i16> @llvm.experimental.vp.reverse.nxv32i16(<vscale x 32 x i16> %src, <vscale x 32 x i1> %mask, i32 %evl)
427  ret <vscale x 32 x i16> %dst
428}
429
430define <vscale x 32 x i16> @test_vp_reverse_nxv32i16(<vscale x 32 x i16> %src, i32 zeroext %evl) {
431; CHECK-LABEL: test_vp_reverse_nxv32i16:
432; CHECK:       # %bb.0:
433; CHECK-NEXT:    addi a1, a0, -1
434; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
435; CHECK-NEXT:    vid.v v16
436; CHECK-NEXT:    vrsub.vx v24, v16, a1
437; CHECK-NEXT:    vrgather.vv v16, v8, v24
438; CHECK-NEXT:    vmv.v.v v8, v16
439; CHECK-NEXT:    ret
440
441  %dst = call <vscale x 32 x i16> @llvm.experimental.vp.reverse.nxv32i16(<vscale x 32 x i16> %src, <vscale x 32 x i1> splat (i1 1), i32 %evl)
442  ret <vscale x 32 x i16> %dst
443}
444
445define <vscale x 64 x i8> @test_vp_reverse_nxv64i8_masked(<vscale x 64 x i8> %src, <vscale x 64 x i1> %mask, i32 zeroext %evl) {
446; CHECK-LABEL: test_vp_reverse_nxv64i8_masked:
447; CHECK:       # %bb.0:
448; CHECK-NEXT:    csrr a1, vlenb
449; CHECK-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
450; CHECK-NEXT:    vid.v v16
451; CHECK-NEXT:    addi a2, a1, -1
452; CHECK-NEXT:    slli a1, a1, 3
453; CHECK-NEXT:    vrsub.vx v24, v16, a2
454; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
455; CHECK-NEXT:    vrgatherei16.vv v23, v8, v24
456; CHECK-NEXT:    vrgatherei16.vv v22, v9, v24
457; CHECK-NEXT:    vrgatherei16.vv v21, v10, v24
458; CHECK-NEXT:    vrgatherei16.vv v20, v11, v24
459; CHECK-NEXT:    vrgatherei16.vv v19, v12, v24
460; CHECK-NEXT:    vrgatherei16.vv v18, v13, v24
461; CHECK-NEXT:    vrgatherei16.vv v17, v14, v24
462; CHECK-NEXT:    vrgatherei16.vv v16, v15, v24
463; CHECK-NEXT:    sub a1, a1, a0
464; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
465; CHECK-NEXT:    vslidedown.vx v8, v16, a1, v0.t
466; CHECK-NEXT:    ret
467  %dst = call <vscale x 64 x i8> @llvm.experimental.vp.reverse.nxv64i8(<vscale x 64 x i8> %src, <vscale x 64 x i1> %mask, i32 %evl)
468  ret <vscale x 64 x i8> %dst
469}
470
471define <vscale x 64 x i8> @test_vp_reverse_nxv64i8(<vscale x 64 x i8> %src, i32 zeroext %evl) {
472; CHECK-LABEL: test_vp_reverse_nxv64i8:
473; CHECK:       # %bb.0:
474; CHECK-NEXT:    csrr a1, vlenb
475; CHECK-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
476; CHECK-NEXT:    vid.v v16
477; CHECK-NEXT:    addi a2, a1, -1
478; CHECK-NEXT:    slli a1, a1, 3
479; CHECK-NEXT:    vrsub.vx v24, v16, a2
480; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
481; CHECK-NEXT:    vrgatherei16.vv v23, v8, v24
482; CHECK-NEXT:    vrgatherei16.vv v22, v9, v24
483; CHECK-NEXT:    vrgatherei16.vv v21, v10, v24
484; CHECK-NEXT:    vrgatherei16.vv v20, v11, v24
485; CHECK-NEXT:    vrgatherei16.vv v19, v12, v24
486; CHECK-NEXT:    vrgatherei16.vv v18, v13, v24
487; CHECK-NEXT:    vrgatherei16.vv v17, v14, v24
488; CHECK-NEXT:    vrgatherei16.vv v16, v15, v24
489; CHECK-NEXT:    sub a1, a1, a0
490; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
491; CHECK-NEXT:    vslidedown.vx v8, v16, a1
492; CHECK-NEXT:    ret
493
494  %dst = call <vscale x 64 x i8> @llvm.experimental.vp.reverse.nxv64i8(<vscale x 64 x i8> %src, <vscale x 64 x i1> splat (i1 1), i32 %evl)
495  ret <vscale x 64 x i8> %dst
496}
497
498define <vscale x 128 x i8> @test_vp_reverse_nxv128i8(<vscale x 128 x i8> %src, i32 zeroext %evl) {
499; CHECK-LABEL: test_vp_reverse_nxv128i8:
500; CHECK:       # %bb.0:
501; CHECK-NEXT:    csrr a2, vlenb
502; CHECK-NEXT:    slli a2, a2, 3
503; CHECK-NEXT:    mv a1, a0
504; CHECK-NEXT:    bltu a0, a2, .LBB32_2
505; CHECK-NEXT:  # %bb.1:
506; CHECK-NEXT:    mv a1, a2
507; CHECK-NEXT:  .LBB32_2:
508; CHECK-NEXT:    addi sp, sp, -80
509; CHECK-NEXT:    .cfi_def_cfa_offset 80
510; CHECK-NEXT:    sd ra, 72(sp) # 8-byte Folded Spill
511; CHECK-NEXT:    sd s0, 64(sp) # 8-byte Folded Spill
512; CHECK-NEXT:    .cfi_offset ra, -8
513; CHECK-NEXT:    .cfi_offset s0, -16
514; CHECK-NEXT:    addi s0, sp, 80
515; CHECK-NEXT:    .cfi_def_cfa s0, 0
516; CHECK-NEXT:    csrr a3, vlenb
517; CHECK-NEXT:    slli a3, a3, 4
518; CHECK-NEXT:    sub sp, sp, a3
519; CHECK-NEXT:    andi sp, sp, -64
520; CHECK-NEXT:    addi a3, sp, 64
521; CHECK-NEXT:    li a4, -1
522; CHECK-NEXT:    sub a5, a0, a2
523; CHECK-NEXT:    add a6, a0, a3
524; CHECK-NEXT:    sltu a0, a0, a5
525; CHECK-NEXT:    add a2, a3, a2
526; CHECK-NEXT:    addi a6, a6, -1
527; CHECK-NEXT:    addi a0, a0, -1
528; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
529; CHECK-NEXT:    vsse8.v v8, (a6), a4
530; CHECK-NEXT:    sub a6, a6, a1
531; CHECK-NEXT:    and a0, a0, a5
532; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
533; CHECK-NEXT:    vsse8.v v16, (a6), a4
534; CHECK-NEXT:    vle8.v v16, (a2)
535; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
536; CHECK-NEXT:    vle8.v v8, (a3)
537; CHECK-NEXT:    addi sp, s0, -80
538; CHECK-NEXT:    .cfi_def_cfa sp, 80
539; CHECK-NEXT:    ld ra, 72(sp) # 8-byte Folded Reload
540; CHECK-NEXT:    ld s0, 64(sp) # 8-byte Folded Reload
541; CHECK-NEXT:    .cfi_restore ra
542; CHECK-NEXT:    .cfi_restore s0
543; CHECK-NEXT:    addi sp, sp, 80
544; CHECK-NEXT:    .cfi_def_cfa_offset 0
545; CHECK-NEXT:    ret
546
547  %dst = call <vscale x 128 x i8> @llvm.experimental.vp.reverse.nxv128i8(<vscale x 128 x i8> %src, <vscale x 128 x i1> splat (i1 1), i32 %evl)
548  ret <vscale x 128 x i8> %dst
549}
550
551; LMUL = 1
552declare <vscale x 1 x i64> @llvm.experimental.vp.reverse.nxv1i64(<vscale x 1 x i64>,<vscale x 1 x i1>,i32)
553declare <vscale x 2 x i32> @llvm.experimental.vp.reverse.nxv2i32(<vscale x 2 x i32>,<vscale x 2 x i1>,i32)
554declare <vscale x 4 x i16> @llvm.experimental.vp.reverse.nxv4i16(<vscale x 4 x i16>,<vscale x 4 x i1>,i32)
555declare <vscale x 8 x i8> @llvm.experimental.vp.reverse.nxv8i8(<vscale x 8 x i8>,<vscale x 8 x i1>,i32)
556
557; LMUL = 2
558declare <vscale x 2 x i64> @llvm.experimental.vp.reverse.nxv2i64(<vscale x 2 x i64>,<vscale x 2 x i1>,i32)
559declare <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32>,<vscale x 4 x i1>,i32)
560declare <vscale x 8 x i16> @llvm.experimental.vp.reverse.nxv8i16(<vscale x 8 x i16>,<vscale x 8 x i1>,i32)
561declare <vscale x 16 x i8> @llvm.experimental.vp.reverse.nxv16i8(<vscale x 16 x i8>,<vscale x 16 x i1>,i32)
562
563; LMUL = 4
564declare <vscale x 4 x i64> @llvm.experimental.vp.reverse.nxv4i64(<vscale x 4 x i64>,<vscale x 4 x i1>,i32)
565declare <vscale x 8 x i32> @llvm.experimental.vp.reverse.nxv8i32(<vscale x 8 x i32>,<vscale x 8 x i1>,i32)
566declare <vscale x 16 x i16> @llvm.experimental.vp.reverse.nxv16i16(<vscale x 16 x i16>,<vscale x 16 x i1>,i32)
567declare <vscale x 32 x i8> @llvm.experimental.vp.reverse.nxv32i8(<vscale x 32 x i8>,<vscale x 32 x i1>,i32)
568
569; LMUL = 8
570declare <vscale x 8 x i64> @llvm.experimental.vp.reverse.nxv8i64(<vscale x 8 x i64>,<vscale x 8 x i1>,i32)
571declare <vscale x 16 x i32> @llvm.experimental.vp.reverse.nxv16i32(<vscale x 16 x i32>,<vscale x 16 x i1>,i32)
572declare <vscale x 32 x i16> @llvm.experimental.vp.reverse.nxv32i16(<vscale x 32 x i16>,<vscale x 32 x i1>,i32)
573declare <vscale x 64 x i8> @llvm.experimental.vp.reverse.nxv64i8(<vscale x 64 x i8>,<vscale x 64 x i1>,i32)
574
575declare <vscale x 128 x i8> @llvm.experimental.vp.reverse.nxv128i8(<vscale x 128 x i8>,<vscale x 128 x i1>,i32)
576