xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll (revision b6c0f1bfa79a3a32d841ac5ab1f94c3aee3b5d90)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+v,+m -target-abi=ilp32d \
3; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
4; RUN: llc -mtriple=riscv64 -mattr=+v,+m -target-abi=lp64d \
5; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
6
7declare <2 x i8> @llvm.vp.bitreverse.v2i8(<2 x i8>, <2 x i1>, i32)
8
9define <2 x i8> @vp_bitreverse_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) {
10; CHECK-LABEL: vp_bitreverse_v2i8:
11; CHECK:       # %bb.0:
12; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
13; CHECK-NEXT:    vand.vi v9, v8, 15, v0.t
14; CHECK-NEXT:    li a0, 51
15; CHECK-NEXT:    vsll.vi v9, v9, 4, v0.t
16; CHECK-NEXT:    vsrl.vi v8, v8, 4, v0.t
17; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
18; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
19; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
20; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
21; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
22; CHECK-NEXT:    li a0, 85
23; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
24; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
25; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
26; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
27; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
28; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
29; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
30; CHECK-NEXT:    ret
31  %v = call <2 x i8> @llvm.vp.bitreverse.v2i8(<2 x i8> %va, <2 x i1> %m, i32 %evl)
32  ret <2 x i8> %v
33}
34
35define <2 x i8> @vp_bitreverse_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) {
36; CHECK-LABEL: vp_bitreverse_v2i8_unmasked:
37; CHECK:       # %bb.0:
38; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
39; CHECK-NEXT:    vand.vi v9, v8, 15
40; CHECK-NEXT:    vsrl.vi v8, v8, 4
41; CHECK-NEXT:    li a0, 51
42; CHECK-NEXT:    vsll.vi v9, v9, 4
43; CHECK-NEXT:    vand.vi v8, v8, 15
44; CHECK-NEXT:    vor.vv v8, v8, v9
45; CHECK-NEXT:    vsrl.vi v9, v8, 2
46; CHECK-NEXT:    vand.vx v8, v8, a0
47; CHECK-NEXT:    vand.vx v9, v9, a0
48; CHECK-NEXT:    li a0, 85
49; CHECK-NEXT:    vsll.vi v8, v8, 2
50; CHECK-NEXT:    vor.vv v8, v9, v8
51; CHECK-NEXT:    vsrl.vi v9, v8, 1
52; CHECK-NEXT:    vand.vx v8, v8, a0
53; CHECK-NEXT:    vand.vx v9, v9, a0
54; CHECK-NEXT:    vadd.vv v8, v8, v8
55; CHECK-NEXT:    vor.vv v8, v9, v8
56; CHECK-NEXT:    ret
57  %v = call <2 x i8> @llvm.vp.bitreverse.v2i8(<2 x i8> %va, <2 x i1> splat (i1 true), i32 %evl)
58  ret <2 x i8> %v
59}
60
61declare <4 x i8> @llvm.vp.bitreverse.v4i8(<4 x i8>, <4 x i1>, i32)
62
63define <4 x i8> @vp_bitreverse_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) {
64; CHECK-LABEL: vp_bitreverse_v4i8:
65; CHECK:       # %bb.0:
66; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
67; CHECK-NEXT:    vand.vi v9, v8, 15, v0.t
68; CHECK-NEXT:    li a0, 51
69; CHECK-NEXT:    vsll.vi v9, v9, 4, v0.t
70; CHECK-NEXT:    vsrl.vi v8, v8, 4, v0.t
71; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
72; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
73; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
74; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
75; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
76; CHECK-NEXT:    li a0, 85
77; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
78; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
79; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
80; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
81; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
82; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
83; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
84; CHECK-NEXT:    ret
85  %v = call <4 x i8> @llvm.vp.bitreverse.v4i8(<4 x i8> %va, <4 x i1> %m, i32 %evl)
86  ret <4 x i8> %v
87}
88
89define <4 x i8> @vp_bitreverse_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) {
90; CHECK-LABEL: vp_bitreverse_v4i8_unmasked:
91; CHECK:       # %bb.0:
92; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
93; CHECK-NEXT:    vand.vi v9, v8, 15
94; CHECK-NEXT:    vsrl.vi v8, v8, 4
95; CHECK-NEXT:    li a0, 51
96; CHECK-NEXT:    vsll.vi v9, v9, 4
97; CHECK-NEXT:    vand.vi v8, v8, 15
98; CHECK-NEXT:    vor.vv v8, v8, v9
99; CHECK-NEXT:    vsrl.vi v9, v8, 2
100; CHECK-NEXT:    vand.vx v8, v8, a0
101; CHECK-NEXT:    vand.vx v9, v9, a0
102; CHECK-NEXT:    li a0, 85
103; CHECK-NEXT:    vsll.vi v8, v8, 2
104; CHECK-NEXT:    vor.vv v8, v9, v8
105; CHECK-NEXT:    vsrl.vi v9, v8, 1
106; CHECK-NEXT:    vand.vx v8, v8, a0
107; CHECK-NEXT:    vand.vx v9, v9, a0
108; CHECK-NEXT:    vadd.vv v8, v8, v8
109; CHECK-NEXT:    vor.vv v8, v9, v8
110; CHECK-NEXT:    ret
111  %v = call <4 x i8> @llvm.vp.bitreverse.v4i8(<4 x i8> %va, <4 x i1> splat (i1 true), i32 %evl)
112  ret <4 x i8> %v
113}
114
115declare <8 x i8> @llvm.vp.bitreverse.v8i8(<8 x i8>, <8 x i1>, i32)
116
117define <8 x i8> @vp_bitreverse_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) {
118; CHECK-LABEL: vp_bitreverse_v8i8:
119; CHECK:       # %bb.0:
120; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
121; CHECK-NEXT:    vand.vi v9, v8, 15, v0.t
122; CHECK-NEXT:    li a0, 51
123; CHECK-NEXT:    vsll.vi v9, v9, 4, v0.t
124; CHECK-NEXT:    vsrl.vi v8, v8, 4, v0.t
125; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
126; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
127; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
128; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
129; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
130; CHECK-NEXT:    li a0, 85
131; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
132; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
133; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
134; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
135; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
136; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
137; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
138; CHECK-NEXT:    ret
139  %v = call <8 x i8> @llvm.vp.bitreverse.v8i8(<8 x i8> %va, <8 x i1> %m, i32 %evl)
140  ret <8 x i8> %v
141}
142
143define <8 x i8> @vp_bitreverse_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) {
144; CHECK-LABEL: vp_bitreverse_v8i8_unmasked:
145; CHECK:       # %bb.0:
146; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
147; CHECK-NEXT:    vand.vi v9, v8, 15
148; CHECK-NEXT:    vsrl.vi v8, v8, 4
149; CHECK-NEXT:    li a0, 51
150; CHECK-NEXT:    vsll.vi v9, v9, 4
151; CHECK-NEXT:    vand.vi v8, v8, 15
152; CHECK-NEXT:    vor.vv v8, v8, v9
153; CHECK-NEXT:    vsrl.vi v9, v8, 2
154; CHECK-NEXT:    vand.vx v8, v8, a0
155; CHECK-NEXT:    vand.vx v9, v9, a0
156; CHECK-NEXT:    li a0, 85
157; CHECK-NEXT:    vsll.vi v8, v8, 2
158; CHECK-NEXT:    vor.vv v8, v9, v8
159; CHECK-NEXT:    vsrl.vi v9, v8, 1
160; CHECK-NEXT:    vand.vx v8, v8, a0
161; CHECK-NEXT:    vand.vx v9, v9, a0
162; CHECK-NEXT:    vadd.vv v8, v8, v8
163; CHECK-NEXT:    vor.vv v8, v9, v8
164; CHECK-NEXT:    ret
165  %v = call <8 x i8> @llvm.vp.bitreverse.v8i8(<8 x i8> %va, <8 x i1> splat (i1 true), i32 %evl)
166  ret <8 x i8> %v
167}
168
169declare <16 x i8> @llvm.vp.bitreverse.v16i8(<16 x i8>, <16 x i1>, i32)
170
171define <16 x i8> @vp_bitreverse_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) {
172; CHECK-LABEL: vp_bitreverse_v16i8:
173; CHECK:       # %bb.0:
174; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
175; CHECK-NEXT:    vand.vi v9, v8, 15, v0.t
176; CHECK-NEXT:    li a0, 51
177; CHECK-NEXT:    vsll.vi v9, v9, 4, v0.t
178; CHECK-NEXT:    vsrl.vi v8, v8, 4, v0.t
179; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
180; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
181; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
182; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
183; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
184; CHECK-NEXT:    li a0, 85
185; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
186; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
187; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
188; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
189; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
190; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
191; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
192; CHECK-NEXT:    ret
193  %v = call <16 x i8> @llvm.vp.bitreverse.v16i8(<16 x i8> %va, <16 x i1> %m, i32 %evl)
194  ret <16 x i8> %v
195}
196
197define <16 x i8> @vp_bitreverse_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) {
198; CHECK-LABEL: vp_bitreverse_v16i8_unmasked:
199; CHECK:       # %bb.0:
200; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
201; CHECK-NEXT:    vand.vi v9, v8, 15
202; CHECK-NEXT:    vsrl.vi v8, v8, 4
203; CHECK-NEXT:    li a0, 51
204; CHECK-NEXT:    vsll.vi v9, v9, 4
205; CHECK-NEXT:    vand.vi v8, v8, 15
206; CHECK-NEXT:    vor.vv v8, v8, v9
207; CHECK-NEXT:    vsrl.vi v9, v8, 2
208; CHECK-NEXT:    vand.vx v8, v8, a0
209; CHECK-NEXT:    vand.vx v9, v9, a0
210; CHECK-NEXT:    li a0, 85
211; CHECK-NEXT:    vsll.vi v8, v8, 2
212; CHECK-NEXT:    vor.vv v8, v9, v8
213; CHECK-NEXT:    vsrl.vi v9, v8, 1
214; CHECK-NEXT:    vand.vx v8, v8, a0
215; CHECK-NEXT:    vand.vx v9, v9, a0
216; CHECK-NEXT:    vadd.vv v8, v8, v8
217; CHECK-NEXT:    vor.vv v8, v9, v8
218; CHECK-NEXT:    ret
219  %v = call <16 x i8> @llvm.vp.bitreverse.v16i8(<16 x i8> %va, <16 x i1> splat (i1 true), i32 %evl)
220  ret <16 x i8> %v
221}
222
223declare <2 x i16> @llvm.vp.bitreverse.v2i16(<2 x i16>, <2 x i1>, i32)
224
225define <2 x i16> @vp_bitreverse_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) {
226; CHECK-LABEL: vp_bitreverse_v2i16:
227; CHECK:       # %bb.0:
228; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
229; CHECK-NEXT:    vsrl.vi v9, v8, 8, v0.t
230; CHECK-NEXT:    lui a0, 1
231; CHECK-NEXT:    vsll.vi v8, v8, 8, v0.t
232; CHECK-NEXT:    addi a0, a0, -241
233; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
234; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
235; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
236; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
237; CHECK-NEXT:    lui a0, 3
238; CHECK-NEXT:    addi a0, a0, 819
239; CHECK-NEXT:    vsll.vi v8, v8, 4, v0.t
240; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
241; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
242; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
243; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
244; CHECK-NEXT:    lui a0, 5
245; CHECK-NEXT:    addi a0, a0, 1365
246; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
247; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
248; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
249; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
250; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
251; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
252; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
253; CHECK-NEXT:    ret
254  %v = call <2 x i16> @llvm.vp.bitreverse.v2i16(<2 x i16> %va, <2 x i1> %m, i32 %evl)
255  ret <2 x i16> %v
256}
257
258define <2 x i16> @vp_bitreverse_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) {
259; CHECK-LABEL: vp_bitreverse_v2i16_unmasked:
260; CHECK:       # %bb.0:
261; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
262; CHECK-NEXT:    vsrl.vi v9, v8, 8
263; CHECK-NEXT:    vsll.vi v8, v8, 8
264; CHECK-NEXT:    lui a0, 1
265; CHECK-NEXT:    vor.vv v8, v8, v9
266; CHECK-NEXT:    addi a0, a0, -241
267; CHECK-NEXT:    vsrl.vi v9, v8, 4
268; CHECK-NEXT:    vand.vx v8, v8, a0
269; CHECK-NEXT:    vand.vx v9, v9, a0
270; CHECK-NEXT:    lui a0, 3
271; CHECK-NEXT:    addi a0, a0, 819
272; CHECK-NEXT:    vsll.vi v8, v8, 4
273; CHECK-NEXT:    vor.vv v8, v9, v8
274; CHECK-NEXT:    vsrl.vi v9, v8, 2
275; CHECK-NEXT:    vand.vx v8, v8, a0
276; CHECK-NEXT:    vand.vx v9, v9, a0
277; CHECK-NEXT:    lui a0, 5
278; CHECK-NEXT:    addi a0, a0, 1365
279; CHECK-NEXT:    vsll.vi v8, v8, 2
280; CHECK-NEXT:    vor.vv v8, v9, v8
281; CHECK-NEXT:    vsrl.vi v9, v8, 1
282; CHECK-NEXT:    vand.vx v8, v8, a0
283; CHECK-NEXT:    vand.vx v9, v9, a0
284; CHECK-NEXT:    vadd.vv v8, v8, v8
285; CHECK-NEXT:    vor.vv v8, v9, v8
286; CHECK-NEXT:    ret
287  %v = call <2 x i16> @llvm.vp.bitreverse.v2i16(<2 x i16> %va, <2 x i1> splat (i1 true), i32 %evl)
288  ret <2 x i16> %v
289}
290
291declare <4 x i16> @llvm.vp.bitreverse.v4i16(<4 x i16>, <4 x i1>, i32)
292
293define <4 x i16> @vp_bitreverse_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) {
294; CHECK-LABEL: vp_bitreverse_v4i16:
295; CHECK:       # %bb.0:
296; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
297; CHECK-NEXT:    vsrl.vi v9, v8, 8, v0.t
298; CHECK-NEXT:    lui a0, 1
299; CHECK-NEXT:    vsll.vi v8, v8, 8, v0.t
300; CHECK-NEXT:    addi a0, a0, -241
301; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
302; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
303; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
304; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
305; CHECK-NEXT:    lui a0, 3
306; CHECK-NEXT:    addi a0, a0, 819
307; CHECK-NEXT:    vsll.vi v8, v8, 4, v0.t
308; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
309; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
310; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
311; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
312; CHECK-NEXT:    lui a0, 5
313; CHECK-NEXT:    addi a0, a0, 1365
314; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
315; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
316; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
317; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
318; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
319; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
320; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
321; CHECK-NEXT:    ret
322  %v = call <4 x i16> @llvm.vp.bitreverse.v4i16(<4 x i16> %va, <4 x i1> %m, i32 %evl)
323  ret <4 x i16> %v
324}
325
326define <4 x i16> @vp_bitreverse_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) {
327; CHECK-LABEL: vp_bitreverse_v4i16_unmasked:
328; CHECK:       # %bb.0:
329; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
330; CHECK-NEXT:    vsrl.vi v9, v8, 8
331; CHECK-NEXT:    vsll.vi v8, v8, 8
332; CHECK-NEXT:    lui a0, 1
333; CHECK-NEXT:    vor.vv v8, v8, v9
334; CHECK-NEXT:    addi a0, a0, -241
335; CHECK-NEXT:    vsrl.vi v9, v8, 4
336; CHECK-NEXT:    vand.vx v8, v8, a0
337; CHECK-NEXT:    vand.vx v9, v9, a0
338; CHECK-NEXT:    lui a0, 3
339; CHECK-NEXT:    addi a0, a0, 819
340; CHECK-NEXT:    vsll.vi v8, v8, 4
341; CHECK-NEXT:    vor.vv v8, v9, v8
342; CHECK-NEXT:    vsrl.vi v9, v8, 2
343; CHECK-NEXT:    vand.vx v8, v8, a0
344; CHECK-NEXT:    vand.vx v9, v9, a0
345; CHECK-NEXT:    lui a0, 5
346; CHECK-NEXT:    addi a0, a0, 1365
347; CHECK-NEXT:    vsll.vi v8, v8, 2
348; CHECK-NEXT:    vor.vv v8, v9, v8
349; CHECK-NEXT:    vsrl.vi v9, v8, 1
350; CHECK-NEXT:    vand.vx v8, v8, a0
351; CHECK-NEXT:    vand.vx v9, v9, a0
352; CHECK-NEXT:    vadd.vv v8, v8, v8
353; CHECK-NEXT:    vor.vv v8, v9, v8
354; CHECK-NEXT:    ret
355  %v = call <4 x i16> @llvm.vp.bitreverse.v4i16(<4 x i16> %va, <4 x i1> splat (i1 true), i32 %evl)
356  ret <4 x i16> %v
357}
358
359declare <8 x i16> @llvm.vp.bitreverse.v8i16(<8 x i16>, <8 x i1>, i32)
360
361define <8 x i16> @vp_bitreverse_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) {
362; CHECK-LABEL: vp_bitreverse_v8i16:
363; CHECK:       # %bb.0:
364; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
365; CHECK-NEXT:    vsrl.vi v9, v8, 8, v0.t
366; CHECK-NEXT:    lui a0, 1
367; CHECK-NEXT:    vsll.vi v8, v8, 8, v0.t
368; CHECK-NEXT:    addi a0, a0, -241
369; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
370; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
371; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
372; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
373; CHECK-NEXT:    lui a0, 3
374; CHECK-NEXT:    addi a0, a0, 819
375; CHECK-NEXT:    vsll.vi v8, v8, 4, v0.t
376; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
377; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
378; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
379; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
380; CHECK-NEXT:    lui a0, 5
381; CHECK-NEXT:    addi a0, a0, 1365
382; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
383; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
384; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
385; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
386; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
387; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
388; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
389; CHECK-NEXT:    ret
390  %v = call <8 x i16> @llvm.vp.bitreverse.v8i16(<8 x i16> %va, <8 x i1> %m, i32 %evl)
391  ret <8 x i16> %v
392}
393
394define <8 x i16> @vp_bitreverse_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) {
395; CHECK-LABEL: vp_bitreverse_v8i16_unmasked:
396; CHECK:       # %bb.0:
397; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
398; CHECK-NEXT:    vsrl.vi v9, v8, 8
399; CHECK-NEXT:    vsll.vi v8, v8, 8
400; CHECK-NEXT:    lui a0, 1
401; CHECK-NEXT:    vor.vv v8, v8, v9
402; CHECK-NEXT:    addi a0, a0, -241
403; CHECK-NEXT:    vsrl.vi v9, v8, 4
404; CHECK-NEXT:    vand.vx v8, v8, a0
405; CHECK-NEXT:    vand.vx v9, v9, a0
406; CHECK-NEXT:    lui a0, 3
407; CHECK-NEXT:    addi a0, a0, 819
408; CHECK-NEXT:    vsll.vi v8, v8, 4
409; CHECK-NEXT:    vor.vv v8, v9, v8
410; CHECK-NEXT:    vsrl.vi v9, v8, 2
411; CHECK-NEXT:    vand.vx v8, v8, a0
412; CHECK-NEXT:    vand.vx v9, v9, a0
413; CHECK-NEXT:    lui a0, 5
414; CHECK-NEXT:    addi a0, a0, 1365
415; CHECK-NEXT:    vsll.vi v8, v8, 2
416; CHECK-NEXT:    vor.vv v8, v9, v8
417; CHECK-NEXT:    vsrl.vi v9, v8, 1
418; CHECK-NEXT:    vand.vx v8, v8, a0
419; CHECK-NEXT:    vand.vx v9, v9, a0
420; CHECK-NEXT:    vadd.vv v8, v8, v8
421; CHECK-NEXT:    vor.vv v8, v9, v8
422; CHECK-NEXT:    ret
423  %v = call <8 x i16> @llvm.vp.bitreverse.v8i16(<8 x i16> %va, <8 x i1> splat (i1 true), i32 %evl)
424  ret <8 x i16> %v
425}
426
427declare <16 x i16> @llvm.vp.bitreverse.v16i16(<16 x i16>, <16 x i1>, i32)
428
429define <16 x i16> @vp_bitreverse_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) {
430; CHECK-LABEL: vp_bitreverse_v16i16:
431; CHECK:       # %bb.0:
432; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
433; CHECK-NEXT:    vsrl.vi v10, v8, 8, v0.t
434; CHECK-NEXT:    lui a0, 1
435; CHECK-NEXT:    vsll.vi v8, v8, 8, v0.t
436; CHECK-NEXT:    addi a0, a0, -241
437; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
438; CHECK-NEXT:    vsrl.vi v10, v8, 4, v0.t
439; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
440; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
441; CHECK-NEXT:    lui a0, 3
442; CHECK-NEXT:    addi a0, a0, 819
443; CHECK-NEXT:    vsll.vi v8, v8, 4, v0.t
444; CHECK-NEXT:    vor.vv v8, v10, v8, v0.t
445; CHECK-NEXT:    vsrl.vi v10, v8, 2, v0.t
446; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
447; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
448; CHECK-NEXT:    lui a0, 5
449; CHECK-NEXT:    addi a0, a0, 1365
450; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
451; CHECK-NEXT:    vor.vv v8, v10, v8, v0.t
452; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
453; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
454; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
455; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
456; CHECK-NEXT:    vor.vv v8, v10, v8, v0.t
457; CHECK-NEXT:    ret
458  %v = call <16 x i16> @llvm.vp.bitreverse.v16i16(<16 x i16> %va, <16 x i1> %m, i32 %evl)
459  ret <16 x i16> %v
460}
461
462define <16 x i16> @vp_bitreverse_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) {
463; CHECK-LABEL: vp_bitreverse_v16i16_unmasked:
464; CHECK:       # %bb.0:
465; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
466; CHECK-NEXT:    vsrl.vi v10, v8, 8
467; CHECK-NEXT:    vsll.vi v8, v8, 8
468; CHECK-NEXT:    lui a0, 1
469; CHECK-NEXT:    vor.vv v8, v8, v10
470; CHECK-NEXT:    addi a0, a0, -241
471; CHECK-NEXT:    vsrl.vi v10, v8, 4
472; CHECK-NEXT:    vand.vx v8, v8, a0
473; CHECK-NEXT:    vand.vx v10, v10, a0
474; CHECK-NEXT:    lui a0, 3
475; CHECK-NEXT:    addi a0, a0, 819
476; CHECK-NEXT:    vsll.vi v8, v8, 4
477; CHECK-NEXT:    vor.vv v8, v10, v8
478; CHECK-NEXT:    vsrl.vi v10, v8, 2
479; CHECK-NEXT:    vand.vx v8, v8, a0
480; CHECK-NEXT:    vand.vx v10, v10, a0
481; CHECK-NEXT:    lui a0, 5
482; CHECK-NEXT:    addi a0, a0, 1365
483; CHECK-NEXT:    vsll.vi v8, v8, 2
484; CHECK-NEXT:    vor.vv v8, v10, v8
485; CHECK-NEXT:    vsrl.vi v10, v8, 1
486; CHECK-NEXT:    vand.vx v8, v8, a0
487; CHECK-NEXT:    vand.vx v10, v10, a0
488; CHECK-NEXT:    vadd.vv v8, v8, v8
489; CHECK-NEXT:    vor.vv v8, v10, v8
490; CHECK-NEXT:    ret
491  %v = call <16 x i16> @llvm.vp.bitreverse.v16i16(<16 x i16> %va, <16 x i1> splat (i1 true), i32 %evl)
492  ret <16 x i16> %v
493}
494
495declare <2 x i32> @llvm.vp.bitreverse.v2i32(<2 x i32>, <2 x i1>, i32)
496
497define <2 x i32> @vp_bitreverse_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) {
498; CHECK-LABEL: vp_bitreverse_v2i32:
499; CHECK:       # %bb.0:
500; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
501; CHECK-NEXT:    vsrl.vi v9, v8, 8, v0.t
502; CHECK-NEXT:    lui a0, 16
503; CHECK-NEXT:    addi a0, a0, -256
504; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
505; CHECK-NEXT:    vsrl.vi v10, v8, 24, v0.t
506; CHECK-NEXT:    vor.vv v9, v9, v10, v0.t
507; CHECK-NEXT:    vand.vx v10, v8, a0, v0.t
508; CHECK-NEXT:    lui a0, 61681
509; CHECK-NEXT:    addi a0, a0, -241
510; CHECK-NEXT:    vsll.vi v10, v10, 8, v0.t
511; CHECK-NEXT:    vsll.vi v8, v8, 24, v0.t
512; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
513; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
514; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
515; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
516; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
517; CHECK-NEXT:    lui a0, 209715
518; CHECK-NEXT:    addi a0, a0, 819
519; CHECK-NEXT:    vsll.vi v8, v8, 4, v0.t
520; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
521; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
522; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
523; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
524; CHECK-NEXT:    lui a0, 349525
525; CHECK-NEXT:    addi a0, a0, 1365
526; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
527; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
528; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
529; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
530; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
531; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
532; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
533; CHECK-NEXT:    ret
534  %v = call <2 x i32> @llvm.vp.bitreverse.v2i32(<2 x i32> %va, <2 x i1> %m, i32 %evl)
535  ret <2 x i32> %v
536}
537
538define <2 x i32> @vp_bitreverse_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) {
539; CHECK-LABEL: vp_bitreverse_v2i32_unmasked:
540; CHECK:       # %bb.0:
541; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
542; CHECK-NEXT:    vsrl.vi v9, v8, 8
543; CHECK-NEXT:    lui a0, 16
544; CHECK-NEXT:    vsrl.vi v10, v8, 24
545; CHECK-NEXT:    addi a0, a0, -256
546; CHECK-NEXT:    vand.vx v9, v9, a0
547; CHECK-NEXT:    vor.vv v9, v9, v10
548; CHECK-NEXT:    vsll.vi v10, v8, 24
549; CHECK-NEXT:    vand.vx v8, v8, a0
550; CHECK-NEXT:    lui a0, 61681
551; CHECK-NEXT:    addi a0, a0, -241
552; CHECK-NEXT:    vsll.vi v8, v8, 8
553; CHECK-NEXT:    vor.vv v8, v10, v8
554; CHECK-NEXT:    vor.vv v8, v8, v9
555; CHECK-NEXT:    vsrl.vi v9, v8, 4
556; CHECK-NEXT:    vand.vx v8, v8, a0
557; CHECK-NEXT:    vand.vx v9, v9, a0
558; CHECK-NEXT:    lui a0, 209715
559; CHECK-NEXT:    addi a0, a0, 819
560; CHECK-NEXT:    vsll.vi v8, v8, 4
561; CHECK-NEXT:    vor.vv v8, v9, v8
562; CHECK-NEXT:    vsrl.vi v9, v8, 2
563; CHECK-NEXT:    vand.vx v8, v8, a0
564; CHECK-NEXT:    vand.vx v9, v9, a0
565; CHECK-NEXT:    lui a0, 349525
566; CHECK-NEXT:    addi a0, a0, 1365
567; CHECK-NEXT:    vsll.vi v8, v8, 2
568; CHECK-NEXT:    vor.vv v8, v9, v8
569; CHECK-NEXT:    vsrl.vi v9, v8, 1
570; CHECK-NEXT:    vand.vx v8, v8, a0
571; CHECK-NEXT:    vand.vx v9, v9, a0
572; CHECK-NEXT:    vadd.vv v8, v8, v8
573; CHECK-NEXT:    vor.vv v8, v9, v8
574; CHECK-NEXT:    ret
575  %v = call <2 x i32> @llvm.vp.bitreverse.v2i32(<2 x i32> %va, <2 x i1> splat (i1 true), i32 %evl)
576  ret <2 x i32> %v
577}
578
579declare <4 x i32> @llvm.vp.bitreverse.v4i32(<4 x i32>, <4 x i1>, i32)
580
581define <4 x i32> @vp_bitreverse_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
582; CHECK-LABEL: vp_bitreverse_v4i32:
583; CHECK:       # %bb.0:
584; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
585; CHECK-NEXT:    vsrl.vi v9, v8, 8, v0.t
586; CHECK-NEXT:    lui a0, 16
587; CHECK-NEXT:    addi a0, a0, -256
588; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
589; CHECK-NEXT:    vsrl.vi v10, v8, 24, v0.t
590; CHECK-NEXT:    vor.vv v9, v9, v10, v0.t
591; CHECK-NEXT:    vand.vx v10, v8, a0, v0.t
592; CHECK-NEXT:    lui a0, 61681
593; CHECK-NEXT:    addi a0, a0, -241
594; CHECK-NEXT:    vsll.vi v10, v10, 8, v0.t
595; CHECK-NEXT:    vsll.vi v8, v8, 24, v0.t
596; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
597; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
598; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
599; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
600; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
601; CHECK-NEXT:    lui a0, 209715
602; CHECK-NEXT:    addi a0, a0, 819
603; CHECK-NEXT:    vsll.vi v8, v8, 4, v0.t
604; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
605; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
606; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
607; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
608; CHECK-NEXT:    lui a0, 349525
609; CHECK-NEXT:    addi a0, a0, 1365
610; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
611; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
612; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
613; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
614; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
615; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
616; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
617; CHECK-NEXT:    ret
618  %v = call <4 x i32> @llvm.vp.bitreverse.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl)
619  ret <4 x i32> %v
620}
621
622define <4 x i32> @vp_bitreverse_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
623; CHECK-LABEL: vp_bitreverse_v4i32_unmasked:
624; CHECK:       # %bb.0:
625; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
626; CHECK-NEXT:    vsrl.vi v9, v8, 8
627; CHECK-NEXT:    lui a0, 16
628; CHECK-NEXT:    vsrl.vi v10, v8, 24
629; CHECK-NEXT:    addi a0, a0, -256
630; CHECK-NEXT:    vand.vx v9, v9, a0
631; CHECK-NEXT:    vor.vv v9, v9, v10
632; CHECK-NEXT:    vsll.vi v10, v8, 24
633; CHECK-NEXT:    vand.vx v8, v8, a0
634; CHECK-NEXT:    lui a0, 61681
635; CHECK-NEXT:    addi a0, a0, -241
636; CHECK-NEXT:    vsll.vi v8, v8, 8
637; CHECK-NEXT:    vor.vv v8, v10, v8
638; CHECK-NEXT:    vor.vv v8, v8, v9
639; CHECK-NEXT:    vsrl.vi v9, v8, 4
640; CHECK-NEXT:    vand.vx v8, v8, a0
641; CHECK-NEXT:    vand.vx v9, v9, a0
642; CHECK-NEXT:    lui a0, 209715
643; CHECK-NEXT:    addi a0, a0, 819
644; CHECK-NEXT:    vsll.vi v8, v8, 4
645; CHECK-NEXT:    vor.vv v8, v9, v8
646; CHECK-NEXT:    vsrl.vi v9, v8, 2
647; CHECK-NEXT:    vand.vx v8, v8, a0
648; CHECK-NEXT:    vand.vx v9, v9, a0
649; CHECK-NEXT:    lui a0, 349525
650; CHECK-NEXT:    addi a0, a0, 1365
651; CHECK-NEXT:    vsll.vi v8, v8, 2
652; CHECK-NEXT:    vor.vv v8, v9, v8
653; CHECK-NEXT:    vsrl.vi v9, v8, 1
654; CHECK-NEXT:    vand.vx v8, v8, a0
655; CHECK-NEXT:    vand.vx v9, v9, a0
656; CHECK-NEXT:    vadd.vv v8, v8, v8
657; CHECK-NEXT:    vor.vv v8, v9, v8
658; CHECK-NEXT:    ret
659  %v = call <4 x i32> @llvm.vp.bitreverse.v4i32(<4 x i32> %va, <4 x i1> splat (i1 true), i32 %evl)
660  ret <4 x i32> %v
661}
662
663declare <8 x i32> @llvm.vp.bitreverse.v8i32(<8 x i32>, <8 x i1>, i32)
664
665define <8 x i32> @vp_bitreverse_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) {
666; CHECK-LABEL: vp_bitreverse_v8i32:
667; CHECK:       # %bb.0:
668; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
669; CHECK-NEXT:    vsrl.vi v10, v8, 8, v0.t
670; CHECK-NEXT:    lui a0, 16
671; CHECK-NEXT:    addi a0, a0, -256
672; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
673; CHECK-NEXT:    vsrl.vi v12, v8, 24, v0.t
674; CHECK-NEXT:    vor.vv v10, v10, v12, v0.t
675; CHECK-NEXT:    vand.vx v12, v8, a0, v0.t
676; CHECK-NEXT:    lui a0, 61681
677; CHECK-NEXT:    addi a0, a0, -241
678; CHECK-NEXT:    vsll.vi v12, v12, 8, v0.t
679; CHECK-NEXT:    vsll.vi v8, v8, 24, v0.t
680; CHECK-NEXT:    vor.vv v8, v8, v12, v0.t
681; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
682; CHECK-NEXT:    vsrl.vi v10, v8, 4, v0.t
683; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
684; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
685; CHECK-NEXT:    lui a0, 209715
686; CHECK-NEXT:    addi a0, a0, 819
687; CHECK-NEXT:    vsll.vi v8, v8, 4, v0.t
688; CHECK-NEXT:    vor.vv v8, v10, v8, v0.t
689; CHECK-NEXT:    vsrl.vi v10, v8, 2, v0.t
690; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
691; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
692; CHECK-NEXT:    lui a0, 349525
693; CHECK-NEXT:    addi a0, a0, 1365
694; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
695; CHECK-NEXT:    vor.vv v8, v10, v8, v0.t
696; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
697; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
698; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
699; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
700; CHECK-NEXT:    vor.vv v8, v10, v8, v0.t
701; CHECK-NEXT:    ret
702  %v = call <8 x i32> @llvm.vp.bitreverse.v8i32(<8 x i32> %va, <8 x i1> %m, i32 %evl)
703  ret <8 x i32> %v
704}
705
706define <8 x i32> @vp_bitreverse_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
707; CHECK-LABEL: vp_bitreverse_v8i32_unmasked:
708; CHECK:       # %bb.0:
709; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
710; CHECK-NEXT:    vsrl.vi v10, v8, 8
711; CHECK-NEXT:    lui a0, 16
712; CHECK-NEXT:    vsrl.vi v12, v8, 24
713; CHECK-NEXT:    addi a0, a0, -256
714; CHECK-NEXT:    vand.vx v10, v10, a0
715; CHECK-NEXT:    vor.vv v10, v10, v12
716; CHECK-NEXT:    vsll.vi v12, v8, 24
717; CHECK-NEXT:    vand.vx v8, v8, a0
718; CHECK-NEXT:    lui a0, 61681
719; CHECK-NEXT:    addi a0, a0, -241
720; CHECK-NEXT:    vsll.vi v8, v8, 8
721; CHECK-NEXT:    vor.vv v8, v12, v8
722; CHECK-NEXT:    vor.vv v8, v8, v10
723; CHECK-NEXT:    vsrl.vi v10, v8, 4
724; CHECK-NEXT:    vand.vx v8, v8, a0
725; CHECK-NEXT:    vand.vx v10, v10, a0
726; CHECK-NEXT:    lui a0, 209715
727; CHECK-NEXT:    addi a0, a0, 819
728; CHECK-NEXT:    vsll.vi v8, v8, 4
729; CHECK-NEXT:    vor.vv v8, v10, v8
730; CHECK-NEXT:    vsrl.vi v10, v8, 2
731; CHECK-NEXT:    vand.vx v8, v8, a0
732; CHECK-NEXT:    vand.vx v10, v10, a0
733; CHECK-NEXT:    lui a0, 349525
734; CHECK-NEXT:    addi a0, a0, 1365
735; CHECK-NEXT:    vsll.vi v8, v8, 2
736; CHECK-NEXT:    vor.vv v8, v10, v8
737; CHECK-NEXT:    vsrl.vi v10, v8, 1
738; CHECK-NEXT:    vand.vx v8, v8, a0
739; CHECK-NEXT:    vand.vx v10, v10, a0
740; CHECK-NEXT:    vadd.vv v8, v8, v8
741; CHECK-NEXT:    vor.vv v8, v10, v8
742; CHECK-NEXT:    ret
743  %v = call <8 x i32> @llvm.vp.bitreverse.v8i32(<8 x i32> %va, <8 x i1> splat (i1 true), i32 %evl)
744  ret <8 x i32> %v
745}
746
747declare <16 x i32> @llvm.vp.bitreverse.v16i32(<16 x i32>, <16 x i1>, i32)
748
749define <16 x i32> @vp_bitreverse_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) {
750; CHECK-LABEL: vp_bitreverse_v16i32:
751; CHECK:       # %bb.0:
752; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
753; CHECK-NEXT:    vsrl.vi v12, v8, 8, v0.t
754; CHECK-NEXT:    lui a0, 16
755; CHECK-NEXT:    addi a0, a0, -256
756; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
757; CHECK-NEXT:    vsrl.vi v16, v8, 24, v0.t
758; CHECK-NEXT:    vor.vv v12, v12, v16, v0.t
759; CHECK-NEXT:    vand.vx v16, v8, a0, v0.t
760; CHECK-NEXT:    lui a0, 61681
761; CHECK-NEXT:    addi a0, a0, -241
762; CHECK-NEXT:    vsll.vi v16, v16, 8, v0.t
763; CHECK-NEXT:    vsll.vi v8, v8, 24, v0.t
764; CHECK-NEXT:    vor.vv v8, v8, v16, v0.t
765; CHECK-NEXT:    vor.vv v8, v8, v12, v0.t
766; CHECK-NEXT:    vsrl.vi v12, v8, 4, v0.t
767; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
768; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
769; CHECK-NEXT:    lui a0, 209715
770; CHECK-NEXT:    addi a0, a0, 819
771; CHECK-NEXT:    vsll.vi v8, v8, 4, v0.t
772; CHECK-NEXT:    vor.vv v8, v12, v8, v0.t
773; CHECK-NEXT:    vsrl.vi v12, v8, 2, v0.t
774; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
775; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
776; CHECK-NEXT:    lui a0, 349525
777; CHECK-NEXT:    addi a0, a0, 1365
778; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
779; CHECK-NEXT:    vor.vv v8, v12, v8, v0.t
780; CHECK-NEXT:    vsrl.vi v12, v8, 1, v0.t
781; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
782; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
783; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
784; CHECK-NEXT:    vor.vv v8, v12, v8, v0.t
785; CHECK-NEXT:    ret
786  %v = call <16 x i32> @llvm.vp.bitreverse.v16i32(<16 x i32> %va, <16 x i1> %m, i32 %evl)
787  ret <16 x i32> %v
788}
789
790define <16 x i32> @vp_bitreverse_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) {
791; CHECK-LABEL: vp_bitreverse_v16i32_unmasked:
792; CHECK:       # %bb.0:
793; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
794; CHECK-NEXT:    vsrl.vi v12, v8, 8
795; CHECK-NEXT:    lui a0, 16
796; CHECK-NEXT:    vsrl.vi v16, v8, 24
797; CHECK-NEXT:    addi a0, a0, -256
798; CHECK-NEXT:    vand.vx v12, v12, a0
799; CHECK-NEXT:    vor.vv v12, v12, v16
800; CHECK-NEXT:    vsll.vi v16, v8, 24
801; CHECK-NEXT:    vand.vx v8, v8, a0
802; CHECK-NEXT:    lui a0, 61681
803; CHECK-NEXT:    addi a0, a0, -241
804; CHECK-NEXT:    vsll.vi v8, v8, 8
805; CHECK-NEXT:    vor.vv v8, v16, v8
806; CHECK-NEXT:    vor.vv v8, v8, v12
807; CHECK-NEXT:    vsrl.vi v12, v8, 4
808; CHECK-NEXT:    vand.vx v8, v8, a0
809; CHECK-NEXT:    vand.vx v12, v12, a0
810; CHECK-NEXT:    lui a0, 209715
811; CHECK-NEXT:    addi a0, a0, 819
812; CHECK-NEXT:    vsll.vi v8, v8, 4
813; CHECK-NEXT:    vor.vv v8, v12, v8
814; CHECK-NEXT:    vsrl.vi v12, v8, 2
815; CHECK-NEXT:    vand.vx v8, v8, a0
816; CHECK-NEXT:    vand.vx v12, v12, a0
817; CHECK-NEXT:    lui a0, 349525
818; CHECK-NEXT:    addi a0, a0, 1365
819; CHECK-NEXT:    vsll.vi v8, v8, 2
820; CHECK-NEXT:    vor.vv v8, v12, v8
821; CHECK-NEXT:    vsrl.vi v12, v8, 1
822; CHECK-NEXT:    vand.vx v8, v8, a0
823; CHECK-NEXT:    vand.vx v12, v12, a0
824; CHECK-NEXT:    vadd.vv v8, v8, v8
825; CHECK-NEXT:    vor.vv v8, v12, v8
826; CHECK-NEXT:    ret
827  %v = call <16 x i32> @llvm.vp.bitreverse.v16i32(<16 x i32> %va, <16 x i1> splat (i1 true), i32 %evl)
828  ret <16 x i32> %v
829}
830
831declare <2 x i64> @llvm.vp.bitreverse.v2i64(<2 x i64>, <2 x i1>, i32)
832
833define <2 x i64> @vp_bitreverse_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
834; RV32-LABEL: vp_bitreverse_v2i64:
835; RV32:       # %bb.0:
836; RV32-NEXT:    addi sp, sp, -16
837; RV32-NEXT:    .cfi_def_cfa_offset 16
838; RV32-NEXT:    lui a4, 1044480
839; RV32-NEXT:    li a3, 56
840; RV32-NEXT:    lui a5, 16
841; RV32-NEXT:    li a2, 40
842; RV32-NEXT:    lui a1, 4080
843; RV32-NEXT:    addi a6, sp, 8
844; RV32-NEXT:    sw a4, 8(sp)
845; RV32-NEXT:    sw zero, 12(sp)
846; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
847; RV32-NEXT:    vlse64.v v9, (a6), zero
848; RV32-NEXT:    lui a4, 61681
849; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
850; RV32-NEXT:    vsll.vx v10, v8, a3, v0.t
851; RV32-NEXT:    addi a5, a5, -256
852; RV32-NEXT:    vand.vx v11, v8, a5, v0.t
853; RV32-NEXT:    vsll.vx v11, v11, a2, v0.t
854; RV32-NEXT:    vor.vv v10, v10, v11, v0.t
855; RV32-NEXT:    vand.vx v11, v8, a1, v0.t
856; RV32-NEXT:    vsll.vi v11, v11, 24, v0.t
857; RV32-NEXT:    vand.vv v12, v8, v9, v0.t
858; RV32-NEXT:    vsll.vi v12, v12, 8, v0.t
859; RV32-NEXT:    vor.vv v11, v11, v12, v0.t
860; RV32-NEXT:    vor.vv v10, v10, v11, v0.t
861; RV32-NEXT:    vsrl.vx v11, v8, a3, v0.t
862; RV32-NEXT:    lui a3, 209715
863; RV32-NEXT:    vsrl.vx v12, v8, a2, v0.t
864; RV32-NEXT:    lui a2, 349525
865; RV32-NEXT:    addi a4, a4, -241
866; RV32-NEXT:    addi a3, a3, 819
867; RV32-NEXT:    addi a2, a2, 1365
868; RV32-NEXT:    vand.vx v12, v12, a5, v0.t
869; RV32-NEXT:    vor.vv v11, v12, v11, v0.t
870; RV32-NEXT:    vsrl.vi v12, v8, 24, v0.t
871; RV32-NEXT:    vand.vx v12, v12, a1, v0.t
872; RV32-NEXT:    vsrl.vi v8, v8, 8, v0.t
873; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
874; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
875; RV32-NEXT:    vmv.v.x v9, a4
876; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
877; RV32-NEXT:    vor.vv v8, v8, v12, v0.t
878; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
879; RV32-NEXT:    vmv.v.x v12, a3
880; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
881; RV32-NEXT:    vor.vv v8, v8, v11, v0.t
882; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
883; RV32-NEXT:    vmv.v.x v11, a2
884; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
885; RV32-NEXT:    vor.vv v8, v10, v8, v0.t
886; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
887; RV32-NEXT:    vand.vv v10, v10, v9, v0.t
888; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
889; RV32-NEXT:    vsll.vi v8, v8, 4, v0.t
890; RV32-NEXT:    vor.vv v8, v10, v8, v0.t
891; RV32-NEXT:    vsrl.vi v9, v8, 2, v0.t
892; RV32-NEXT:    vand.vv v9, v9, v12, v0.t
893; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
894; RV32-NEXT:    vsll.vi v8, v8, 2, v0.t
895; RV32-NEXT:    vor.vv v8, v9, v8, v0.t
896; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
897; RV32-NEXT:    vand.vv v9, v9, v11, v0.t
898; RV32-NEXT:    vand.vv v8, v8, v11, v0.t
899; RV32-NEXT:    vsll.vi v8, v8, 1, v0.t
900; RV32-NEXT:    vor.vv v8, v9, v8, v0.t
901; RV32-NEXT:    addi sp, sp, 16
902; RV32-NEXT:    .cfi_def_cfa_offset 0
903; RV32-NEXT:    ret
904;
905; RV64-LABEL: vp_bitreverse_v2i64:
906; RV64:       # %bb.0:
907; RV64-NEXT:    lui a1, 4080
908; RV64-NEXT:    li a3, 255
909; RV64-NEXT:    li a2, 56
910; RV64-NEXT:    lui a4, 16
911; RV64-NEXT:    lui a5, 61681
912; RV64-NEXT:    lui a6, 209715
913; RV64-NEXT:    lui a7, 349525
914; RV64-NEXT:    addiw a5, a5, -241
915; RV64-NEXT:    addiw a6, a6, 819
916; RV64-NEXT:    addiw a7, a7, 1365
917; RV64-NEXT:    slli t0, a5, 32
918; RV64-NEXT:    add t0, a5, t0
919; RV64-NEXT:    slli a5, a6, 32
920; RV64-NEXT:    add a6, a6, a5
921; RV64-NEXT:    slli a5, a7, 32
922; RV64-NEXT:    add a5, a7, a5
923; RV64-NEXT:    li a7, 40
924; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
925; RV64-NEXT:    vand.vx v9, v8, a1, v0.t
926; RV64-NEXT:    slli a3, a3, 24
927; RV64-NEXT:    addiw a0, a4, -256
928; RV64-NEXT:    vsll.vi v9, v9, 24, v0.t
929; RV64-NEXT:    vand.vx v10, v8, a3, v0.t
930; RV64-NEXT:    vsll.vi v10, v10, 8, v0.t
931; RV64-NEXT:    vor.vv v9, v9, v10, v0.t
932; RV64-NEXT:    vsll.vx v10, v8, a2, v0.t
933; RV64-NEXT:    vand.vx v11, v8, a0, v0.t
934; RV64-NEXT:    vsll.vx v11, v11, a7, v0.t
935; RV64-NEXT:    vor.vv v10, v10, v11, v0.t
936; RV64-NEXT:    vor.vv v9, v10, v9, v0.t
937; RV64-NEXT:    vsrl.vx v10, v8, a2, v0.t
938; RV64-NEXT:    vsrl.vx v11, v8, a7, v0.t
939; RV64-NEXT:    vand.vx v11, v11, a0, v0.t
940; RV64-NEXT:    vor.vv v10, v11, v10, v0.t
941; RV64-NEXT:    vsrl.vi v11, v8, 24, v0.t
942; RV64-NEXT:    vand.vx v11, v11, a1, v0.t
943; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
944; RV64-NEXT:    vand.vx v8, v8, a3, v0.t
945; RV64-NEXT:    vor.vv v8, v8, v11, v0.t
946; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
947; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
948; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
949; RV64-NEXT:    vand.vx v9, v9, t0, v0.t
950; RV64-NEXT:    vand.vx v8, v8, t0, v0.t
951; RV64-NEXT:    vsll.vi v8, v8, 4, v0.t
952; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
953; RV64-NEXT:    vsrl.vi v9, v8, 2, v0.t
954; RV64-NEXT:    vand.vx v9, v9, a6, v0.t
955; RV64-NEXT:    vand.vx v8, v8, a6, v0.t
956; RV64-NEXT:    vsll.vi v8, v8, 2, v0.t
957; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
958; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
959; RV64-NEXT:    vand.vx v9, v9, a5, v0.t
960; RV64-NEXT:    vand.vx v8, v8, a5, v0.t
961; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
962; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
963; RV64-NEXT:    ret
964  %v = call <2 x i64> @llvm.vp.bitreverse.v2i64(<2 x i64> %va, <2 x i1> %m, i32 %evl)
965  ret <2 x i64> %v
966}
967
968define <2 x i64> @vp_bitreverse_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
969; RV32-LABEL: vp_bitreverse_v2i64_unmasked:
970; RV32:       # %bb.0:
971; RV32-NEXT:    addi sp, sp, -16
972; RV32-NEXT:    .cfi_def_cfa_offset 16
973; RV32-NEXT:    lui a1, 1044480
974; RV32-NEXT:    li a2, 56
975; RV32-NEXT:    lui a3, 16
976; RV32-NEXT:    li a4, 40
977; RV32-NEXT:    lui a5, 4080
978; RV32-NEXT:    addi a6, sp, 8
979; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
980; RV32-NEXT:    vsrl.vi v9, v8, 24
981; RV32-NEXT:    sw a1, 8(sp)
982; RV32-NEXT:    sw zero, 12(sp)
983; RV32-NEXT:    vsll.vx v10, v8, a2
984; RV32-NEXT:    addi a1, a3, -256
985; RV32-NEXT:    vsrl.vx v11, v8, a2
986; RV32-NEXT:    vsrl.vx v12, v8, a4
987; RV32-NEXT:    vand.vx v13, v8, a1
988; RV32-NEXT:    vand.vx v12, v12, a1
989; RV32-NEXT:    vor.vv v11, v12, v11
990; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
991; RV32-NEXT:    vlse64.v v12, (a6), zero
992; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
993; RV32-NEXT:    vsll.vx v13, v13, a4
994; RV32-NEXT:    vor.vv v10, v10, v13
995; RV32-NEXT:    vsrl.vi v13, v8, 8
996; RV32-NEXT:    vand.vx v9, v9, a5
997; RV32-NEXT:    vand.vv v13, v13, v12
998; RV32-NEXT:    vor.vv v9, v13, v9
999; RV32-NEXT:    lui a1, 61681
1000; RV32-NEXT:    lui a2, 209715
1001; RV32-NEXT:    lui a3, 349525
1002; RV32-NEXT:    vand.vv v12, v8, v12
1003; RV32-NEXT:    vand.vx v8, v8, a5
1004; RV32-NEXT:    addi a1, a1, -241
1005; RV32-NEXT:    addi a2, a2, 819
1006; RV32-NEXT:    addi a3, a3, 1365
1007; RV32-NEXT:    vsll.vi v8, v8, 24
1008; RV32-NEXT:    vsll.vi v12, v12, 8
1009; RV32-NEXT:    vor.vv v8, v8, v12
1010; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1011; RV32-NEXT:    vmv.v.x v12, a1
1012; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1013; RV32-NEXT:    vor.vv v9, v9, v11
1014; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1015; RV32-NEXT:    vmv.v.x v11, a2
1016; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1017; RV32-NEXT:    vor.vv v8, v10, v8
1018; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1019; RV32-NEXT:    vmv.v.x v10, a3
1020; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1021; RV32-NEXT:    vor.vv v8, v8, v9
1022; RV32-NEXT:    vsrl.vi v9, v8, 4
1023; RV32-NEXT:    vand.vv v8, v8, v12
1024; RV32-NEXT:    vand.vv v9, v9, v12
1025; RV32-NEXT:    vsll.vi v8, v8, 4
1026; RV32-NEXT:    vor.vv v8, v9, v8
1027; RV32-NEXT:    vsrl.vi v9, v8, 2
1028; RV32-NEXT:    vand.vv v8, v8, v11
1029; RV32-NEXT:    vand.vv v9, v9, v11
1030; RV32-NEXT:    vsll.vi v8, v8, 2
1031; RV32-NEXT:    vor.vv v8, v9, v8
1032; RV32-NEXT:    vsrl.vi v9, v8, 1
1033; RV32-NEXT:    vand.vv v8, v8, v10
1034; RV32-NEXT:    vand.vv v9, v9, v10
1035; RV32-NEXT:    vadd.vv v8, v8, v8
1036; RV32-NEXT:    vor.vv v8, v9, v8
1037; RV32-NEXT:    addi sp, sp, 16
1038; RV32-NEXT:    .cfi_def_cfa_offset 0
1039; RV32-NEXT:    ret
1040;
1041; RV64-LABEL: vp_bitreverse_v2i64_unmasked:
1042; RV64:       # %bb.0:
1043; RV64-NEXT:    lui a1, 4080
1044; RV64-NEXT:    li a2, 255
1045; RV64-NEXT:    li a3, 56
1046; RV64-NEXT:    lui a4, 16
1047; RV64-NEXT:    li a5, 40
1048; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1049; RV64-NEXT:    vsrl.vi v9, v8, 24
1050; RV64-NEXT:    vsrl.vi v10, v8, 8
1051; RV64-NEXT:    addiw a0, a4, -256
1052; RV64-NEXT:    vsrl.vx v11, v8, a3
1053; RV64-NEXT:    vsrl.vx v12, v8, a5
1054; RV64-NEXT:    vand.vx v12, v12, a0
1055; RV64-NEXT:    vor.vv v11, v12, v11
1056; RV64-NEXT:    vand.vx v12, v8, a1
1057; RV64-NEXT:    slli a2, a2, 24
1058; RV64-NEXT:    vand.vx v9, v9, a1
1059; RV64-NEXT:    vsll.vi v12, v12, 24
1060; RV64-NEXT:    vand.vx v10, v10, a2
1061; RV64-NEXT:    vor.vv v9, v10, v9
1062; RV64-NEXT:    vand.vx v10, v8, a2
1063; RV64-NEXT:    vsll.vi v10, v10, 8
1064; RV64-NEXT:    vor.vv v10, v12, v10
1065; RV64-NEXT:    vsll.vx v12, v8, a3
1066; RV64-NEXT:    vand.vx v8, v8, a0
1067; RV64-NEXT:    vsll.vx v8, v8, a5
1068; RV64-NEXT:    vor.vv v8, v12, v8
1069; RV64-NEXT:    lui a0, 61681
1070; RV64-NEXT:    lui a1, 209715
1071; RV64-NEXT:    lui a2, 349525
1072; RV64-NEXT:    addiw a0, a0, -241
1073; RV64-NEXT:    addiw a1, a1, 819
1074; RV64-NEXT:    addiw a2, a2, 1365
1075; RV64-NEXT:    slli a3, a0, 32
1076; RV64-NEXT:    slli a4, a1, 32
1077; RV64-NEXT:    add a0, a0, a3
1078; RV64-NEXT:    slli a3, a2, 32
1079; RV64-NEXT:    add a1, a1, a4
1080; RV64-NEXT:    add a2, a2, a3
1081; RV64-NEXT:    vor.vv v9, v9, v11
1082; RV64-NEXT:    vor.vv v8, v8, v10
1083; RV64-NEXT:    vor.vv v8, v8, v9
1084; RV64-NEXT:    vsrl.vi v9, v8, 4
1085; RV64-NEXT:    vand.vx v8, v8, a0
1086; RV64-NEXT:    vand.vx v9, v9, a0
1087; RV64-NEXT:    vsll.vi v8, v8, 4
1088; RV64-NEXT:    vor.vv v8, v9, v8
1089; RV64-NEXT:    vsrl.vi v9, v8, 2
1090; RV64-NEXT:    vand.vx v8, v8, a1
1091; RV64-NEXT:    vand.vx v9, v9, a1
1092; RV64-NEXT:    vsll.vi v8, v8, 2
1093; RV64-NEXT:    vor.vv v8, v9, v8
1094; RV64-NEXT:    vsrl.vi v9, v8, 1
1095; RV64-NEXT:    vand.vx v8, v8, a2
1096; RV64-NEXT:    vand.vx v9, v9, a2
1097; RV64-NEXT:    vadd.vv v8, v8, v8
1098; RV64-NEXT:    vor.vv v8, v9, v8
1099; RV64-NEXT:    ret
1100  %v = call <2 x i64> @llvm.vp.bitreverse.v2i64(<2 x i64> %va, <2 x i1> splat (i1 true), i32 %evl)
1101  ret <2 x i64> %v
1102}
1103
1104declare <4 x i64> @llvm.vp.bitreverse.v4i64(<4 x i64>, <4 x i1>, i32)
1105
1106define <4 x i64> @vp_bitreverse_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
1107; RV32-LABEL: vp_bitreverse_v4i64:
1108; RV32:       # %bb.0:
1109; RV32-NEXT:    addi sp, sp, -16
1110; RV32-NEXT:    .cfi_def_cfa_offset 16
1111; RV32-NEXT:    lui a4, 1044480
1112; RV32-NEXT:    li a3, 56
1113; RV32-NEXT:    lui a5, 16
1114; RV32-NEXT:    li a2, 40
1115; RV32-NEXT:    lui a1, 4080
1116; RV32-NEXT:    addi a6, sp, 8
1117; RV32-NEXT:    sw a4, 8(sp)
1118; RV32-NEXT:    sw zero, 12(sp)
1119; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
1120; RV32-NEXT:    vlse64.v v10, (a6), zero
1121; RV32-NEXT:    lui a4, 61681
1122; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1123; RV32-NEXT:    vsll.vx v12, v8, a3, v0.t
1124; RV32-NEXT:    addi a5, a5, -256
1125; RV32-NEXT:    vand.vx v14, v8, a5, v0.t
1126; RV32-NEXT:    vsll.vx v14, v14, a2, v0.t
1127; RV32-NEXT:    vor.vv v12, v12, v14, v0.t
1128; RV32-NEXT:    vand.vx v14, v8, a1, v0.t
1129; RV32-NEXT:    vsll.vi v14, v14, 24, v0.t
1130; RV32-NEXT:    vand.vv v16, v8, v10, v0.t
1131; RV32-NEXT:    vsll.vi v16, v16, 8, v0.t
1132; RV32-NEXT:    vor.vv v14, v14, v16, v0.t
1133; RV32-NEXT:    vor.vv v12, v12, v14, v0.t
1134; RV32-NEXT:    vsrl.vx v14, v8, a3, v0.t
1135; RV32-NEXT:    lui a3, 209715
1136; RV32-NEXT:    vsrl.vx v16, v8, a2, v0.t
1137; RV32-NEXT:    lui a2, 349525
1138; RV32-NEXT:    addi a4, a4, -241
1139; RV32-NEXT:    addi a3, a3, 819
1140; RV32-NEXT:    addi a2, a2, 1365
1141; RV32-NEXT:    vand.vx v16, v16, a5, v0.t
1142; RV32-NEXT:    vor.vv v14, v16, v14, v0.t
1143; RV32-NEXT:    vsrl.vi v16, v8, 24, v0.t
1144; RV32-NEXT:    vand.vx v16, v16, a1, v0.t
1145; RV32-NEXT:    vsrl.vi v8, v8, 8, v0.t
1146; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
1147; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1148; RV32-NEXT:    vmv.v.x v10, a4
1149; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1150; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
1151; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1152; RV32-NEXT:    vmv.v.x v16, a3
1153; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1154; RV32-NEXT:    vor.vv v8, v8, v14, v0.t
1155; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1156; RV32-NEXT:    vmv.v.x v14, a2
1157; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1158; RV32-NEXT:    vor.vv v8, v12, v8, v0.t
1159; RV32-NEXT:    vsrl.vi v12, v8, 4, v0.t
1160; RV32-NEXT:    vand.vv v12, v12, v10, v0.t
1161; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
1162; RV32-NEXT:    vsll.vi v8, v8, 4, v0.t
1163; RV32-NEXT:    vor.vv v8, v12, v8, v0.t
1164; RV32-NEXT:    vsrl.vi v10, v8, 2, v0.t
1165; RV32-NEXT:    vand.vv v10, v10, v16, v0.t
1166; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
1167; RV32-NEXT:    vsll.vi v8, v8, 2, v0.t
1168; RV32-NEXT:    vor.vv v8, v10, v8, v0.t
1169; RV32-NEXT:    vsrl.vi v10, v8, 1, v0.t
1170; RV32-NEXT:    vand.vv v10, v10, v14, v0.t
1171; RV32-NEXT:    vand.vv v8, v8, v14, v0.t
1172; RV32-NEXT:    vsll.vi v8, v8, 1, v0.t
1173; RV32-NEXT:    vor.vv v8, v10, v8, v0.t
1174; RV32-NEXT:    addi sp, sp, 16
1175; RV32-NEXT:    .cfi_def_cfa_offset 0
1176; RV32-NEXT:    ret
1177;
1178; RV64-LABEL: vp_bitreverse_v4i64:
1179; RV64:       # %bb.0:
1180; RV64-NEXT:    lui a1, 4080
1181; RV64-NEXT:    li a3, 255
1182; RV64-NEXT:    li a2, 56
1183; RV64-NEXT:    lui a4, 16
1184; RV64-NEXT:    lui a5, 61681
1185; RV64-NEXT:    lui a6, 209715
1186; RV64-NEXT:    lui a7, 349525
1187; RV64-NEXT:    addiw a5, a5, -241
1188; RV64-NEXT:    addiw a6, a6, 819
1189; RV64-NEXT:    addiw a7, a7, 1365
1190; RV64-NEXT:    slli t0, a5, 32
1191; RV64-NEXT:    add t0, a5, t0
1192; RV64-NEXT:    slli a5, a6, 32
1193; RV64-NEXT:    add a6, a6, a5
1194; RV64-NEXT:    slli a5, a7, 32
1195; RV64-NEXT:    add a5, a7, a5
1196; RV64-NEXT:    li a7, 40
1197; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1198; RV64-NEXT:    vand.vx v10, v8, a1, v0.t
1199; RV64-NEXT:    slli a3, a3, 24
1200; RV64-NEXT:    addiw a0, a4, -256
1201; RV64-NEXT:    vsll.vi v10, v10, 24, v0.t
1202; RV64-NEXT:    vand.vx v12, v8, a3, v0.t
1203; RV64-NEXT:    vsll.vi v12, v12, 8, v0.t
1204; RV64-NEXT:    vor.vv v10, v10, v12, v0.t
1205; RV64-NEXT:    vsll.vx v12, v8, a2, v0.t
1206; RV64-NEXT:    vand.vx v14, v8, a0, v0.t
1207; RV64-NEXT:    vsll.vx v14, v14, a7, v0.t
1208; RV64-NEXT:    vor.vv v12, v12, v14, v0.t
1209; RV64-NEXT:    vor.vv v10, v12, v10, v0.t
1210; RV64-NEXT:    vsrl.vx v12, v8, a2, v0.t
1211; RV64-NEXT:    vsrl.vx v14, v8, a7, v0.t
1212; RV64-NEXT:    vand.vx v14, v14, a0, v0.t
1213; RV64-NEXT:    vor.vv v12, v14, v12, v0.t
1214; RV64-NEXT:    vsrl.vi v14, v8, 24, v0.t
1215; RV64-NEXT:    vand.vx v14, v14, a1, v0.t
1216; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
1217; RV64-NEXT:    vand.vx v8, v8, a3, v0.t
1218; RV64-NEXT:    vor.vv v8, v8, v14, v0.t
1219; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
1220; RV64-NEXT:    vor.vv v8, v10, v8, v0.t
1221; RV64-NEXT:    vsrl.vi v10, v8, 4, v0.t
1222; RV64-NEXT:    vand.vx v10, v10, t0, v0.t
1223; RV64-NEXT:    vand.vx v8, v8, t0, v0.t
1224; RV64-NEXT:    vsll.vi v8, v8, 4, v0.t
1225; RV64-NEXT:    vor.vv v8, v10, v8, v0.t
1226; RV64-NEXT:    vsrl.vi v10, v8, 2, v0.t
1227; RV64-NEXT:    vand.vx v10, v10, a6, v0.t
1228; RV64-NEXT:    vand.vx v8, v8, a6, v0.t
1229; RV64-NEXT:    vsll.vi v8, v8, 2, v0.t
1230; RV64-NEXT:    vor.vv v8, v10, v8, v0.t
1231; RV64-NEXT:    vsrl.vi v10, v8, 1, v0.t
1232; RV64-NEXT:    vand.vx v10, v10, a5, v0.t
1233; RV64-NEXT:    vand.vx v8, v8, a5, v0.t
1234; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
1235; RV64-NEXT:    vor.vv v8, v10, v8, v0.t
1236; RV64-NEXT:    ret
1237  %v = call <4 x i64> @llvm.vp.bitreverse.v4i64(<4 x i64> %va, <4 x i1> %m, i32 %evl)
1238  ret <4 x i64> %v
1239}
1240
1241define <4 x i64> @vp_bitreverse_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
1242; RV32-LABEL: vp_bitreverse_v4i64_unmasked:
1243; RV32:       # %bb.0:
1244; RV32-NEXT:    addi sp, sp, -16
1245; RV32-NEXT:    .cfi_def_cfa_offset 16
1246; RV32-NEXT:    lui a1, 1044480
1247; RV32-NEXT:    li a2, 56
1248; RV32-NEXT:    lui a3, 16
1249; RV32-NEXT:    li a4, 40
1250; RV32-NEXT:    lui a5, 4080
1251; RV32-NEXT:    addi a6, sp, 8
1252; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1253; RV32-NEXT:    vsrl.vi v14, v8, 24
1254; RV32-NEXT:    sw a1, 8(sp)
1255; RV32-NEXT:    sw zero, 12(sp)
1256; RV32-NEXT:    vsll.vx v12, v8, a2
1257; RV32-NEXT:    addi a1, a3, -256
1258; RV32-NEXT:    vsrl.vx v10, v8, a2
1259; RV32-NEXT:    vsrl.vx v16, v8, a4
1260; RV32-NEXT:    vand.vx v18, v8, a1
1261; RV32-NEXT:    vand.vx v16, v16, a1
1262; RV32-NEXT:    vor.vv v10, v16, v10
1263; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
1264; RV32-NEXT:    vlse64.v v16, (a6), zero
1265; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1266; RV32-NEXT:    vsll.vx v18, v18, a4
1267; RV32-NEXT:    vor.vv v12, v12, v18
1268; RV32-NEXT:    vsrl.vi v18, v8, 8
1269; RV32-NEXT:    vand.vx v14, v14, a5
1270; RV32-NEXT:    vand.vv v18, v18, v16
1271; RV32-NEXT:    vor.vv v14, v18, v14
1272; RV32-NEXT:    lui a1, 61681
1273; RV32-NEXT:    lui a2, 209715
1274; RV32-NEXT:    lui a3, 349525
1275; RV32-NEXT:    vand.vv v16, v8, v16
1276; RV32-NEXT:    vand.vx v8, v8, a5
1277; RV32-NEXT:    addi a1, a1, -241
1278; RV32-NEXT:    addi a2, a2, 819
1279; RV32-NEXT:    addi a3, a3, 1365
1280; RV32-NEXT:    vsll.vi v8, v8, 24
1281; RV32-NEXT:    vsll.vi v16, v16, 8
1282; RV32-NEXT:    vor.vv v8, v8, v16
1283; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1284; RV32-NEXT:    vmv.v.x v16, a1
1285; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1286; RV32-NEXT:    vor.vv v10, v14, v10
1287; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1288; RV32-NEXT:    vmv.v.x v14, a2
1289; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1290; RV32-NEXT:    vor.vv v8, v12, v8
1291; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1292; RV32-NEXT:    vmv.v.x v12, a3
1293; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1294; RV32-NEXT:    vor.vv v8, v8, v10
1295; RV32-NEXT:    vsrl.vi v10, v8, 4
1296; RV32-NEXT:    vand.vv v8, v8, v16
1297; RV32-NEXT:    vand.vv v10, v10, v16
1298; RV32-NEXT:    vsll.vi v8, v8, 4
1299; RV32-NEXT:    vor.vv v8, v10, v8
1300; RV32-NEXT:    vsrl.vi v10, v8, 2
1301; RV32-NEXT:    vand.vv v8, v8, v14
1302; RV32-NEXT:    vand.vv v10, v10, v14
1303; RV32-NEXT:    vsll.vi v8, v8, 2
1304; RV32-NEXT:    vor.vv v8, v10, v8
1305; RV32-NEXT:    vsrl.vi v10, v8, 1
1306; RV32-NEXT:    vand.vv v8, v8, v12
1307; RV32-NEXT:    vand.vv v10, v10, v12
1308; RV32-NEXT:    vadd.vv v8, v8, v8
1309; RV32-NEXT:    vor.vv v8, v10, v8
1310; RV32-NEXT:    addi sp, sp, 16
1311; RV32-NEXT:    .cfi_def_cfa_offset 0
1312; RV32-NEXT:    ret
1313;
1314; RV64-LABEL: vp_bitreverse_v4i64_unmasked:
1315; RV64:       # %bb.0:
1316; RV64-NEXT:    lui a1, 4080
1317; RV64-NEXT:    li a2, 255
1318; RV64-NEXT:    li a3, 56
1319; RV64-NEXT:    lui a4, 16
1320; RV64-NEXT:    li a5, 40
1321; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1322; RV64-NEXT:    vsrl.vi v12, v8, 24
1323; RV64-NEXT:    vsrl.vi v14, v8, 8
1324; RV64-NEXT:    addiw a0, a4, -256
1325; RV64-NEXT:    vsrl.vx v10, v8, a3
1326; RV64-NEXT:    vsrl.vx v16, v8, a5
1327; RV64-NEXT:    vand.vx v16, v16, a0
1328; RV64-NEXT:    vor.vv v10, v16, v10
1329; RV64-NEXT:    vand.vx v16, v8, a1
1330; RV64-NEXT:    slli a2, a2, 24
1331; RV64-NEXT:    vand.vx v12, v12, a1
1332; RV64-NEXT:    vsll.vi v16, v16, 24
1333; RV64-NEXT:    vand.vx v14, v14, a2
1334; RV64-NEXT:    vor.vv v12, v14, v12
1335; RV64-NEXT:    vand.vx v14, v8, a2
1336; RV64-NEXT:    vsll.vi v14, v14, 8
1337; RV64-NEXT:    vor.vv v14, v16, v14
1338; RV64-NEXT:    vsll.vx v16, v8, a3
1339; RV64-NEXT:    vand.vx v8, v8, a0
1340; RV64-NEXT:    vsll.vx v8, v8, a5
1341; RV64-NEXT:    vor.vv v8, v16, v8
1342; RV64-NEXT:    lui a0, 61681
1343; RV64-NEXT:    lui a1, 209715
1344; RV64-NEXT:    lui a2, 349525
1345; RV64-NEXT:    addiw a0, a0, -241
1346; RV64-NEXT:    addiw a1, a1, 819
1347; RV64-NEXT:    addiw a2, a2, 1365
1348; RV64-NEXT:    slli a3, a0, 32
1349; RV64-NEXT:    slli a4, a1, 32
1350; RV64-NEXT:    add a0, a0, a3
1351; RV64-NEXT:    slli a3, a2, 32
1352; RV64-NEXT:    add a1, a1, a4
1353; RV64-NEXT:    add a2, a2, a3
1354; RV64-NEXT:    vor.vv v10, v12, v10
1355; RV64-NEXT:    vor.vv v8, v8, v14
1356; RV64-NEXT:    vor.vv v8, v8, v10
1357; RV64-NEXT:    vsrl.vi v10, v8, 4
1358; RV64-NEXT:    vand.vx v8, v8, a0
1359; RV64-NEXT:    vand.vx v10, v10, a0
1360; RV64-NEXT:    vsll.vi v8, v8, 4
1361; RV64-NEXT:    vor.vv v8, v10, v8
1362; RV64-NEXT:    vsrl.vi v10, v8, 2
1363; RV64-NEXT:    vand.vx v8, v8, a1
1364; RV64-NEXT:    vand.vx v10, v10, a1
1365; RV64-NEXT:    vsll.vi v8, v8, 2
1366; RV64-NEXT:    vor.vv v8, v10, v8
1367; RV64-NEXT:    vsrl.vi v10, v8, 1
1368; RV64-NEXT:    vand.vx v8, v8, a2
1369; RV64-NEXT:    vand.vx v10, v10, a2
1370; RV64-NEXT:    vadd.vv v8, v8, v8
1371; RV64-NEXT:    vor.vv v8, v10, v8
1372; RV64-NEXT:    ret
1373  %v = call <4 x i64> @llvm.vp.bitreverse.v4i64(<4 x i64> %va, <4 x i1> splat (i1 true), i32 %evl)
1374  ret <4 x i64> %v
1375}
1376
1377declare <8 x i64> @llvm.vp.bitreverse.v8i64(<8 x i64>, <8 x i1>, i32)
1378
1379define <8 x i64> @vp_bitreverse_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
1380; RV32-LABEL: vp_bitreverse_v8i64:
1381; RV32:       # %bb.0:
1382; RV32-NEXT:    addi sp, sp, -16
1383; RV32-NEXT:    .cfi_def_cfa_offset 16
1384; RV32-NEXT:    lui a4, 1044480
1385; RV32-NEXT:    li a3, 56
1386; RV32-NEXT:    lui a5, 16
1387; RV32-NEXT:    li a2, 40
1388; RV32-NEXT:    lui a1, 4080
1389; RV32-NEXT:    addi a6, sp, 8
1390; RV32-NEXT:    sw a4, 8(sp)
1391; RV32-NEXT:    sw zero, 12(sp)
1392; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1393; RV32-NEXT:    vlse64.v v12, (a6), zero
1394; RV32-NEXT:    lui a4, 61681
1395; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1396; RV32-NEXT:    vsll.vx v16, v8, a3, v0.t
1397; RV32-NEXT:    addi a5, a5, -256
1398; RV32-NEXT:    vand.vx v20, v8, a5, v0.t
1399; RV32-NEXT:    vsll.vx v20, v20, a2, v0.t
1400; RV32-NEXT:    vor.vv v16, v16, v20, v0.t
1401; RV32-NEXT:    vand.vx v20, v8, a1, v0.t
1402; RV32-NEXT:    vsll.vi v20, v20, 24, v0.t
1403; RV32-NEXT:    vand.vv v24, v8, v12, v0.t
1404; RV32-NEXT:    vsll.vi v24, v24, 8, v0.t
1405; RV32-NEXT:    vor.vv v20, v20, v24, v0.t
1406; RV32-NEXT:    vor.vv v16, v16, v20, v0.t
1407; RV32-NEXT:    vsrl.vx v20, v8, a3, v0.t
1408; RV32-NEXT:    lui a3, 209715
1409; RV32-NEXT:    vsrl.vx v24, v8, a2, v0.t
1410; RV32-NEXT:    lui a2, 349525
1411; RV32-NEXT:    addi a4, a4, -241
1412; RV32-NEXT:    addi a3, a3, 819
1413; RV32-NEXT:    addi a2, a2, 1365
1414; RV32-NEXT:    vand.vx v24, v24, a5, v0.t
1415; RV32-NEXT:    vor.vv v20, v24, v20, v0.t
1416; RV32-NEXT:    vsrl.vi v24, v8, 24, v0.t
1417; RV32-NEXT:    vand.vx v24, v24, a1, v0.t
1418; RV32-NEXT:    vsrl.vi v8, v8, 8, v0.t
1419; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
1420; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
1421; RV32-NEXT:    vmv.v.x v28, a4
1422; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1423; RV32-NEXT:    vor.vv v8, v8, v24, v0.t
1424; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
1425; RV32-NEXT:    vmv.v.x v12, a3
1426; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1427; RV32-NEXT:    vor.vv v20, v8, v20, v0.t
1428; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
1429; RV32-NEXT:    vmv.v.x v8, a2
1430; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1431; RV32-NEXT:    vor.vv v16, v16, v20, v0.t
1432; RV32-NEXT:    vsrl.vi v20, v16, 4, v0.t
1433; RV32-NEXT:    vand.vv v20, v20, v28, v0.t
1434; RV32-NEXT:    vand.vv v16, v16, v28, v0.t
1435; RV32-NEXT:    vsll.vi v16, v16, 4, v0.t
1436; RV32-NEXT:    vor.vv v16, v20, v16, v0.t
1437; RV32-NEXT:    vsrl.vi v20, v16, 2, v0.t
1438; RV32-NEXT:    vand.vv v20, v20, v12, v0.t
1439; RV32-NEXT:    vand.vv v12, v16, v12, v0.t
1440; RV32-NEXT:    vsll.vi v12, v12, 2, v0.t
1441; RV32-NEXT:    vor.vv v12, v20, v12, v0.t
1442; RV32-NEXT:    vsrl.vi v16, v12, 1, v0.t
1443; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
1444; RV32-NEXT:    vand.vv v8, v12, v8, v0.t
1445; RV32-NEXT:    vsll.vi v8, v8, 1, v0.t
1446; RV32-NEXT:    vor.vv v8, v16, v8, v0.t
1447; RV32-NEXT:    addi sp, sp, 16
1448; RV32-NEXT:    .cfi_def_cfa_offset 0
1449; RV32-NEXT:    ret
1450;
1451; RV64-LABEL: vp_bitreverse_v8i64:
1452; RV64:       # %bb.0:
1453; RV64-NEXT:    lui a1, 4080
1454; RV64-NEXT:    li a3, 255
1455; RV64-NEXT:    li a2, 56
1456; RV64-NEXT:    lui a4, 16
1457; RV64-NEXT:    lui a5, 61681
1458; RV64-NEXT:    lui a6, 209715
1459; RV64-NEXT:    lui a7, 349525
1460; RV64-NEXT:    addiw a5, a5, -241
1461; RV64-NEXT:    addiw a6, a6, 819
1462; RV64-NEXT:    addiw a7, a7, 1365
1463; RV64-NEXT:    slli t0, a5, 32
1464; RV64-NEXT:    add t0, a5, t0
1465; RV64-NEXT:    slli a5, a6, 32
1466; RV64-NEXT:    add a6, a6, a5
1467; RV64-NEXT:    slli a5, a7, 32
1468; RV64-NEXT:    add a5, a7, a5
1469; RV64-NEXT:    li a7, 40
1470; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1471; RV64-NEXT:    vand.vx v12, v8, a1, v0.t
1472; RV64-NEXT:    slli a3, a3, 24
1473; RV64-NEXT:    addiw a0, a4, -256
1474; RV64-NEXT:    vsll.vi v12, v12, 24, v0.t
1475; RV64-NEXT:    vand.vx v16, v8, a3, v0.t
1476; RV64-NEXT:    vsll.vi v16, v16, 8, v0.t
1477; RV64-NEXT:    vor.vv v12, v12, v16, v0.t
1478; RV64-NEXT:    vsll.vx v16, v8, a2, v0.t
1479; RV64-NEXT:    vand.vx v20, v8, a0, v0.t
1480; RV64-NEXT:    vsll.vx v20, v20, a7, v0.t
1481; RV64-NEXT:    vor.vv v16, v16, v20, v0.t
1482; RV64-NEXT:    vor.vv v12, v16, v12, v0.t
1483; RV64-NEXT:    vsrl.vx v16, v8, a2, v0.t
1484; RV64-NEXT:    vsrl.vx v20, v8, a7, v0.t
1485; RV64-NEXT:    vand.vx v20, v20, a0, v0.t
1486; RV64-NEXT:    vor.vv v16, v20, v16, v0.t
1487; RV64-NEXT:    vsrl.vi v20, v8, 24, v0.t
1488; RV64-NEXT:    vand.vx v20, v20, a1, v0.t
1489; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
1490; RV64-NEXT:    vand.vx v8, v8, a3, v0.t
1491; RV64-NEXT:    vor.vv v8, v8, v20, v0.t
1492; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
1493; RV64-NEXT:    vor.vv v8, v12, v8, v0.t
1494; RV64-NEXT:    vsrl.vi v12, v8, 4, v0.t
1495; RV64-NEXT:    vand.vx v12, v12, t0, v0.t
1496; RV64-NEXT:    vand.vx v8, v8, t0, v0.t
1497; RV64-NEXT:    vsll.vi v8, v8, 4, v0.t
1498; RV64-NEXT:    vor.vv v8, v12, v8, v0.t
1499; RV64-NEXT:    vsrl.vi v12, v8, 2, v0.t
1500; RV64-NEXT:    vand.vx v12, v12, a6, v0.t
1501; RV64-NEXT:    vand.vx v8, v8, a6, v0.t
1502; RV64-NEXT:    vsll.vi v8, v8, 2, v0.t
1503; RV64-NEXT:    vor.vv v8, v12, v8, v0.t
1504; RV64-NEXT:    vsrl.vi v12, v8, 1, v0.t
1505; RV64-NEXT:    vand.vx v12, v12, a5, v0.t
1506; RV64-NEXT:    vand.vx v8, v8, a5, v0.t
1507; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
1508; RV64-NEXT:    vor.vv v8, v12, v8, v0.t
1509; RV64-NEXT:    ret
1510  %v = call <8 x i64> @llvm.vp.bitreverse.v8i64(<8 x i64> %va, <8 x i1> %m, i32 %evl)
1511  ret <8 x i64> %v
1512}
1513
1514define <8 x i64> @vp_bitreverse_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
1515; RV32-LABEL: vp_bitreverse_v8i64_unmasked:
1516; RV32:       # %bb.0:
1517; RV32-NEXT:    addi sp, sp, -16
1518; RV32-NEXT:    .cfi_def_cfa_offset 16
1519; RV32-NEXT:    lui a1, 1044480
1520; RV32-NEXT:    li a2, 56
1521; RV32-NEXT:    lui a3, 16
1522; RV32-NEXT:    li a4, 40
1523; RV32-NEXT:    lui a5, 4080
1524; RV32-NEXT:    addi a6, sp, 8
1525; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1526; RV32-NEXT:    vsrl.vi v20, v8, 24
1527; RV32-NEXT:    sw a1, 8(sp)
1528; RV32-NEXT:    sw zero, 12(sp)
1529; RV32-NEXT:    vsll.vx v16, v8, a2
1530; RV32-NEXT:    addi a1, a3, -256
1531; RV32-NEXT:    vsrl.vx v12, v8, a2
1532; RV32-NEXT:    vsrl.vx v24, v8, a4
1533; RV32-NEXT:    vand.vx v28, v8, a1
1534; RV32-NEXT:    vand.vx v24, v24, a1
1535; RV32-NEXT:    vor.vv v12, v24, v12
1536; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1537; RV32-NEXT:    vlse64.v v24, (a6), zero
1538; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1539; RV32-NEXT:    vsll.vx v28, v28, a4
1540; RV32-NEXT:    vor.vv v16, v16, v28
1541; RV32-NEXT:    vsrl.vi v28, v8, 8
1542; RV32-NEXT:    vand.vx v20, v20, a5
1543; RV32-NEXT:    vand.vv v28, v28, v24
1544; RV32-NEXT:    vor.vv v20, v28, v20
1545; RV32-NEXT:    lui a1, 61681
1546; RV32-NEXT:    lui a2, 209715
1547; RV32-NEXT:    lui a3, 349525
1548; RV32-NEXT:    vand.vv v24, v8, v24
1549; RV32-NEXT:    vand.vx v8, v8, a5
1550; RV32-NEXT:    addi a1, a1, -241
1551; RV32-NEXT:    addi a2, a2, 819
1552; RV32-NEXT:    addi a3, a3, 1365
1553; RV32-NEXT:    vsll.vi v8, v8, 24
1554; RV32-NEXT:    vsll.vi v24, v24, 8
1555; RV32-NEXT:    vor.vv v8, v8, v24
1556; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
1557; RV32-NEXT:    vmv.v.x v24, a1
1558; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1559; RV32-NEXT:    vor.vv v12, v20, v12
1560; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
1561; RV32-NEXT:    vmv.v.x v20, a2
1562; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1563; RV32-NEXT:    vor.vv v8, v16, v8
1564; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
1565; RV32-NEXT:    vmv.v.x v16, a3
1566; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1567; RV32-NEXT:    vor.vv v8, v8, v12
1568; RV32-NEXT:    vsrl.vi v12, v8, 4
1569; RV32-NEXT:    vand.vv v8, v8, v24
1570; RV32-NEXT:    vand.vv v12, v12, v24
1571; RV32-NEXT:    vsll.vi v8, v8, 4
1572; RV32-NEXT:    vor.vv v8, v12, v8
1573; RV32-NEXT:    vsrl.vi v12, v8, 2
1574; RV32-NEXT:    vand.vv v8, v8, v20
1575; RV32-NEXT:    vand.vv v12, v12, v20
1576; RV32-NEXT:    vsll.vi v8, v8, 2
1577; RV32-NEXT:    vor.vv v8, v12, v8
1578; RV32-NEXT:    vsrl.vi v12, v8, 1
1579; RV32-NEXT:    vand.vv v8, v8, v16
1580; RV32-NEXT:    vand.vv v12, v12, v16
1581; RV32-NEXT:    vadd.vv v8, v8, v8
1582; RV32-NEXT:    vor.vv v8, v12, v8
1583; RV32-NEXT:    addi sp, sp, 16
1584; RV32-NEXT:    .cfi_def_cfa_offset 0
1585; RV32-NEXT:    ret
1586;
1587; RV64-LABEL: vp_bitreverse_v8i64_unmasked:
1588; RV64:       # %bb.0:
1589; RV64-NEXT:    lui a1, 4080
1590; RV64-NEXT:    li a2, 255
1591; RV64-NEXT:    li a3, 56
1592; RV64-NEXT:    lui a4, 16
1593; RV64-NEXT:    li a5, 40
1594; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1595; RV64-NEXT:    vsrl.vi v16, v8, 24
1596; RV64-NEXT:    vsrl.vi v20, v8, 8
1597; RV64-NEXT:    addiw a0, a4, -256
1598; RV64-NEXT:    vsrl.vx v12, v8, a3
1599; RV64-NEXT:    vsrl.vx v24, v8, a5
1600; RV64-NEXT:    vand.vx v24, v24, a0
1601; RV64-NEXT:    vor.vv v12, v24, v12
1602; RV64-NEXT:    vand.vx v24, v8, a1
1603; RV64-NEXT:    slli a2, a2, 24
1604; RV64-NEXT:    vand.vx v16, v16, a1
1605; RV64-NEXT:    vsll.vi v24, v24, 24
1606; RV64-NEXT:    vand.vx v20, v20, a2
1607; RV64-NEXT:    vor.vv v16, v20, v16
1608; RV64-NEXT:    vand.vx v20, v8, a2
1609; RV64-NEXT:    vsll.vi v20, v20, 8
1610; RV64-NEXT:    vor.vv v20, v24, v20
1611; RV64-NEXT:    vsll.vx v24, v8, a3
1612; RV64-NEXT:    vand.vx v8, v8, a0
1613; RV64-NEXT:    vsll.vx v8, v8, a5
1614; RV64-NEXT:    vor.vv v8, v24, v8
1615; RV64-NEXT:    lui a0, 61681
1616; RV64-NEXT:    lui a1, 209715
1617; RV64-NEXT:    lui a2, 349525
1618; RV64-NEXT:    addiw a0, a0, -241
1619; RV64-NEXT:    addiw a1, a1, 819
1620; RV64-NEXT:    addiw a2, a2, 1365
1621; RV64-NEXT:    slli a3, a0, 32
1622; RV64-NEXT:    slli a4, a1, 32
1623; RV64-NEXT:    add a0, a0, a3
1624; RV64-NEXT:    slli a3, a2, 32
1625; RV64-NEXT:    add a1, a1, a4
1626; RV64-NEXT:    add a2, a2, a3
1627; RV64-NEXT:    vor.vv v12, v16, v12
1628; RV64-NEXT:    vor.vv v8, v8, v20
1629; RV64-NEXT:    vor.vv v8, v8, v12
1630; RV64-NEXT:    vsrl.vi v12, v8, 4
1631; RV64-NEXT:    vand.vx v8, v8, a0
1632; RV64-NEXT:    vand.vx v12, v12, a0
1633; RV64-NEXT:    vsll.vi v8, v8, 4
1634; RV64-NEXT:    vor.vv v8, v12, v8
1635; RV64-NEXT:    vsrl.vi v12, v8, 2
1636; RV64-NEXT:    vand.vx v8, v8, a1
1637; RV64-NEXT:    vand.vx v12, v12, a1
1638; RV64-NEXT:    vsll.vi v8, v8, 2
1639; RV64-NEXT:    vor.vv v8, v12, v8
1640; RV64-NEXT:    vsrl.vi v12, v8, 1
1641; RV64-NEXT:    vand.vx v8, v8, a2
1642; RV64-NEXT:    vand.vx v12, v12, a2
1643; RV64-NEXT:    vadd.vv v8, v8, v8
1644; RV64-NEXT:    vor.vv v8, v12, v8
1645; RV64-NEXT:    ret
1646  %v = call <8 x i64> @llvm.vp.bitreverse.v8i64(<8 x i64> %va, <8 x i1> splat (i1 true), i32 %evl)
1647  ret <8 x i64> %v
1648}
1649
1650declare <15 x i64> @llvm.vp.bitreverse.v15i64(<15 x i64>, <15 x i1>, i32)
1651
1652define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) {
1653; RV32-LABEL: vp_bitreverse_v15i64:
1654; RV32:       # %bb.0:
1655; RV32-NEXT:    addi sp, sp, -48
1656; RV32-NEXT:    .cfi_def_cfa_offset 48
1657; RV32-NEXT:    csrr a1, vlenb
1658; RV32-NEXT:    li a2, 24
1659; RV32-NEXT:    mul a1, a1, a2
1660; RV32-NEXT:    sub sp, sp, a1
1661; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 24 * vlenb
1662; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1663; RV32-NEXT:    vmv8r.v v24, v8
1664; RV32-NEXT:    lui a2, 1044480
1665; RV32-NEXT:    lui a3, 61681
1666; RV32-NEXT:    lui a4, 209715
1667; RV32-NEXT:    lui a5, 349525
1668; RV32-NEXT:    li a1, 56
1669; RV32-NEXT:    lui a6, 16
1670; RV32-NEXT:    sw a2, 16(sp)
1671; RV32-NEXT:    sw zero, 20(sp)
1672; RV32-NEXT:    addi a2, a3, -241
1673; RV32-NEXT:    sw a2, 40(sp)
1674; RV32-NEXT:    sw a2, 44(sp)
1675; RV32-NEXT:    li a2, 40
1676; RV32-NEXT:    addi a3, a4, 819
1677; RV32-NEXT:    sw a3, 32(sp)
1678; RV32-NEXT:    sw a3, 36(sp)
1679; RV32-NEXT:    addi a3, sp, 16
1680; RV32-NEXT:    addi a4, a5, 1365
1681; RV32-NEXT:    vsll.vx v16, v8, a1, v0.t
1682; RV32-NEXT:    addi a5, a6, -256
1683; RV32-NEXT:    sw a4, 24(sp)
1684; RV32-NEXT:    sw a4, 28(sp)
1685; RV32-NEXT:    vand.vx v8, v8, a5, v0.t
1686; RV32-NEXT:    vsll.vx v8, v8, a2, v0.t
1687; RV32-NEXT:    vor.vv v8, v16, v8, v0.t
1688; RV32-NEXT:    csrr a4, vlenb
1689; RV32-NEXT:    slli a4, a4, 4
1690; RV32-NEXT:    add a4, sp, a4
1691; RV32-NEXT:    addi a4, a4, 48
1692; RV32-NEXT:    vs8r.v v8, (a4) # Unknown-size Folded Spill
1693; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1694; RV32-NEXT:    vlse64.v v8, (a3), zero
1695; RV32-NEXT:    csrr a3, vlenb
1696; RV32-NEXT:    slli a3, a3, 3
1697; RV32-NEXT:    add a3, sp, a3
1698; RV32-NEXT:    addi a3, a3, 48
1699; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
1700; RV32-NEXT:    lui a3, 4080
1701; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1702; RV32-NEXT:    vand.vx v16, v24, a3, v0.t
1703; RV32-NEXT:    vsll.vi v16, v16, 24, v0.t
1704; RV32-NEXT:    addi a4, sp, 48
1705; RV32-NEXT:    vs8r.v v16, (a4) # Unknown-size Folded Spill
1706; RV32-NEXT:    vand.vv v16, v24, v8, v0.t
1707; RV32-NEXT:    vsll.vi v16, v16, 8, v0.t
1708; RV32-NEXT:    vl8r.v v8, (a4) # Unknown-size Folded Reload
1709; RV32-NEXT:    vor.vv v16, v8, v16, v0.t
1710; RV32-NEXT:    csrr a4, vlenb
1711; RV32-NEXT:    slli a4, a4, 4
1712; RV32-NEXT:    add a4, sp, a4
1713; RV32-NEXT:    addi a4, a4, 48
1714; RV32-NEXT:    vl8r.v v8, (a4) # Unknown-size Folded Reload
1715; RV32-NEXT:    vor.vv v16, v8, v16, v0.t
1716; RV32-NEXT:    csrr a4, vlenb
1717; RV32-NEXT:    slli a4, a4, 4
1718; RV32-NEXT:    add a4, sp, a4
1719; RV32-NEXT:    addi a4, a4, 48
1720; RV32-NEXT:    vs8r.v v16, (a4) # Unknown-size Folded Spill
1721; RV32-NEXT:    vsrl.vx v16, v24, a1, v0.t
1722; RV32-NEXT:    vsrl.vx v8, v24, a2, v0.t
1723; RV32-NEXT:    vand.vx v8, v8, a5, v0.t
1724; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
1725; RV32-NEXT:    addi a1, sp, 48
1726; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
1727; RV32-NEXT:    vsrl.vi v8, v24, 24, v0.t
1728; RV32-NEXT:    vand.vx v16, v8, a3, v0.t
1729; RV32-NEXT:    vsrl.vi v8, v24, 8, v0.t
1730; RV32-NEXT:    csrr a1, vlenb
1731; RV32-NEXT:    slli a1, a1, 3
1732; RV32-NEXT:    add a1, sp, a1
1733; RV32-NEXT:    addi a1, a1, 48
1734; RV32-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
1735; RV32-NEXT:    vand.vv v8, v8, v24, v0.t
1736; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
1737; RV32-NEXT:    addi a1, sp, 48
1738; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
1739; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
1740; RV32-NEXT:    addi a1, sp, 40
1741; RV32-NEXT:    addi a2, sp, 32
1742; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1743; RV32-NEXT:    vlse64.v v24, (a1), zero
1744; RV32-NEXT:    addi a1, sp, 24
1745; RV32-NEXT:    csrr a3, vlenb
1746; RV32-NEXT:    slli a3, a3, 4
1747; RV32-NEXT:    add a3, sp, a3
1748; RV32-NEXT:    addi a3, a3, 48
1749; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
1750; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1751; RV32-NEXT:    vor.vv v8, v16, v8, v0.t
1752; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
1753; RV32-NEXT:    vand.vv v16, v16, v24, v0.t
1754; RV32-NEXT:    vand.vv v24, v8, v24, v0.t
1755; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1756; RV32-NEXT:    vlse64.v v8, (a2), zero
1757; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1758; RV32-NEXT:    vsll.vi v24, v24, 4, v0.t
1759; RV32-NEXT:    vor.vv v24, v16, v24, v0.t
1760; RV32-NEXT:    vsrl.vi v16, v24, 2, v0.t
1761; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
1762; RV32-NEXT:    vand.vv v24, v24, v8, v0.t
1763; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1764; RV32-NEXT:    vlse64.v v8, (a1), zero
1765; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1766; RV32-NEXT:    vsll.vi v24, v24, 2, v0.t
1767; RV32-NEXT:    vor.vv v16, v16, v24, v0.t
1768; RV32-NEXT:    vsrl.vi v24, v16, 1, v0.t
1769; RV32-NEXT:    vand.vv v24, v24, v8, v0.t
1770; RV32-NEXT:    vand.vv v8, v16, v8, v0.t
1771; RV32-NEXT:    vsll.vi v8, v8, 1, v0.t
1772; RV32-NEXT:    vor.vv v8, v24, v8, v0.t
1773; RV32-NEXT:    csrr a0, vlenb
1774; RV32-NEXT:    li a1, 24
1775; RV32-NEXT:    mul a0, a0, a1
1776; RV32-NEXT:    add sp, sp, a0
1777; RV32-NEXT:    .cfi_def_cfa sp, 48
1778; RV32-NEXT:    addi sp, sp, 48
1779; RV32-NEXT:    .cfi_def_cfa_offset 0
1780; RV32-NEXT:    ret
1781;
1782; RV64-LABEL: vp_bitreverse_v15i64:
1783; RV64:       # %bb.0:
1784; RV64-NEXT:    addi sp, sp, -16
1785; RV64-NEXT:    .cfi_def_cfa_offset 16
1786; RV64-NEXT:    csrr a1, vlenb
1787; RV64-NEXT:    slli a1, a1, 3
1788; RV64-NEXT:    sub sp, sp, a1
1789; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
1790; RV64-NEXT:    lui a1, 4080
1791; RV64-NEXT:    li a2, 255
1792; RV64-NEXT:    li a3, 56
1793; RV64-NEXT:    lui a4, 16
1794; RV64-NEXT:    li a5, 40
1795; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1796; RV64-NEXT:    vand.vx v16, v8, a1, v0.t
1797; RV64-NEXT:    slli a2, a2, 24
1798; RV64-NEXT:    addiw a0, a4, -256
1799; RV64-NEXT:    vsll.vi v16, v16, 24, v0.t
1800; RV64-NEXT:    vand.vx v24, v8, a2, v0.t
1801; RV64-NEXT:    vsll.vi v24, v24, 8, v0.t
1802; RV64-NEXT:    vor.vv v16, v16, v24, v0.t
1803; RV64-NEXT:    addi a4, sp, 16
1804; RV64-NEXT:    vs8r.v v16, (a4) # Unknown-size Folded Spill
1805; RV64-NEXT:    vsll.vx v24, v8, a3, v0.t
1806; RV64-NEXT:    vand.vx v16, v8, a0, v0.t
1807; RV64-NEXT:    vsll.vx v16, v16, a5, v0.t
1808; RV64-NEXT:    vor.vv v16, v24, v16, v0.t
1809; RV64-NEXT:    vl8r.v v24, (a4) # Unknown-size Folded Reload
1810; RV64-NEXT:    vor.vv v16, v16, v24, v0.t
1811; RV64-NEXT:    vs8r.v v16, (a4) # Unknown-size Folded Spill
1812; RV64-NEXT:    vsrl.vx v24, v8, a3, v0.t
1813; RV64-NEXT:    vsrl.vx v16, v8, a5, v0.t
1814; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
1815; RV64-NEXT:    vor.vv v24, v16, v24, v0.t
1816; RV64-NEXT:    vsrl.vi v16, v8, 24, v0.t
1817; RV64-NEXT:    vand.vx v16, v16, a1, v0.t
1818; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
1819; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
1820; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
1821; RV64-NEXT:    vor.vv v8, v8, v24, v0.t
1822; RV64-NEXT:    lui a0, 61681
1823; RV64-NEXT:    lui a1, 209715
1824; RV64-NEXT:    lui a2, 349525
1825; RV64-NEXT:    addiw a0, a0, -241
1826; RV64-NEXT:    addiw a1, a1, 819
1827; RV64-NEXT:    addiw a2, a2, 1365
1828; RV64-NEXT:    slli a3, a0, 32
1829; RV64-NEXT:    slli a4, a1, 32
1830; RV64-NEXT:    add a0, a0, a3
1831; RV64-NEXT:    slli a3, a2, 32
1832; RV64-NEXT:    add a1, a1, a4
1833; RV64-NEXT:    add a2, a2, a3
1834; RV64-NEXT:    addi a3, sp, 16
1835; RV64-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
1836; RV64-NEXT:    vor.vv v8, v16, v8, v0.t
1837; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
1838; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
1839; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
1840; RV64-NEXT:    vsll.vi v8, v8, 4, v0.t
1841; RV64-NEXT:    vor.vv v8, v16, v8, v0.t
1842; RV64-NEXT:    vsrl.vi v16, v8, 2, v0.t
1843; RV64-NEXT:    vand.vx v16, v16, a1, v0.t
1844; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
1845; RV64-NEXT:    vsll.vi v8, v8, 2, v0.t
1846; RV64-NEXT:    vor.vv v8, v16, v8, v0.t
1847; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
1848; RV64-NEXT:    vand.vx v16, v16, a2, v0.t
1849; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
1850; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
1851; RV64-NEXT:    vor.vv v8, v16, v8, v0.t
1852; RV64-NEXT:    csrr a0, vlenb
1853; RV64-NEXT:    slli a0, a0, 3
1854; RV64-NEXT:    add sp, sp, a0
1855; RV64-NEXT:    .cfi_def_cfa sp, 16
1856; RV64-NEXT:    addi sp, sp, 16
1857; RV64-NEXT:    .cfi_def_cfa_offset 0
1858; RV64-NEXT:    ret
1859  %v = call <15 x i64> @llvm.vp.bitreverse.v15i64(<15 x i64> %va, <15 x i1> %m, i32 %evl)
1860  ret <15 x i64> %v
1861}
1862
1863define <15 x i64> @vp_bitreverse_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) {
1864; RV32-LABEL: vp_bitreverse_v15i64_unmasked:
1865; RV32:       # %bb.0:
1866; RV32-NEXT:    addi sp, sp, -48
1867; RV32-NEXT:    .cfi_def_cfa_offset 48
1868; RV32-NEXT:    csrr a1, vlenb
1869; RV32-NEXT:    slli a1, a1, 4
1870; RV32-NEXT:    sub sp, sp, a1
1871; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb
1872; RV32-NEXT:    lui a1, 1044480
1873; RV32-NEXT:    lui a2, 61681
1874; RV32-NEXT:    lui a3, 209715
1875; RV32-NEXT:    lui a4, 349525
1876; RV32-NEXT:    li a5, 56
1877; RV32-NEXT:    lui a6, 16
1878; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1879; RV32-NEXT:    vsll.vx v16, v8, a5
1880; RV32-NEXT:    vsrl.vx v24, v8, a5
1881; RV32-NEXT:    li a5, 40
1882; RV32-NEXT:    sw a1, 16(sp)
1883; RV32-NEXT:    sw zero, 20(sp)
1884; RV32-NEXT:    addi a1, a2, -241
1885; RV32-NEXT:    sw a1, 40(sp)
1886; RV32-NEXT:    sw a1, 44(sp)
1887; RV32-NEXT:    lui a1, 4080
1888; RV32-NEXT:    addi a2, a3, 819
1889; RV32-NEXT:    sw a2, 32(sp)
1890; RV32-NEXT:    sw a2, 36(sp)
1891; RV32-NEXT:    addi a2, sp, 16
1892; RV32-NEXT:    addi a3, a4, 1365
1893; RV32-NEXT:    addi a4, a6, -256
1894; RV32-NEXT:    vsrl.vx v0, v8, a5
1895; RV32-NEXT:    sw a3, 24(sp)
1896; RV32-NEXT:    sw a3, 28(sp)
1897; RV32-NEXT:    vand.vx v0, v0, a4
1898; RV32-NEXT:    vor.vv v24, v0, v24
1899; RV32-NEXT:    addi a3, sp, 48
1900; RV32-NEXT:    vs8r.v v24, (a3) # Unknown-size Folded Spill
1901; RV32-NEXT:    vand.vx v0, v8, a4
1902; RV32-NEXT:    vsll.vx v0, v0, a5
1903; RV32-NEXT:    vor.vv v16, v16, v0
1904; RV32-NEXT:    csrr a3, vlenb
1905; RV32-NEXT:    slli a3, a3, 3
1906; RV32-NEXT:    add a3, sp, a3
1907; RV32-NEXT:    addi a3, a3, 48
1908; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
1909; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1910; RV32-NEXT:    vlse64.v v0, (a2), zero
1911; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1912; RV32-NEXT:    vsrl.vi v16, v8, 24
1913; RV32-NEXT:    vand.vx v16, v16, a1
1914; RV32-NEXT:    vsrl.vi v24, v8, 8
1915; RV32-NEXT:    vand.vv v24, v24, v0
1916; RV32-NEXT:    vor.vv v16, v24, v16
1917; RV32-NEXT:    vand.vv v24, v8, v0
1918; RV32-NEXT:    vand.vx v8, v8, a1
1919; RV32-NEXT:    vsll.vi v8, v8, 24
1920; RV32-NEXT:    vsll.vi v24, v24, 8
1921; RV32-NEXT:    vor.vv v0, v8, v24
1922; RV32-NEXT:    addi a1, sp, 48
1923; RV32-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
1924; RV32-NEXT:    vor.vv v8, v16, v8
1925; RV32-NEXT:    addi a1, sp, 40
1926; RV32-NEXT:    addi a2, sp, 32
1927; RV32-NEXT:    csrr a3, vlenb
1928; RV32-NEXT:    slli a3, a3, 3
1929; RV32-NEXT:    add a3, sp, a3
1930; RV32-NEXT:    addi a3, a3, 48
1931; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
1932; RV32-NEXT:    vor.vv v24, v16, v0
1933; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1934; RV32-NEXT:    vlse64.v v16, (a1), zero
1935; RV32-NEXT:    addi a1, sp, 24
1936; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1937; RV32-NEXT:    vor.vv v8, v24, v8
1938; RV32-NEXT:    vsrl.vi v24, v8, 4
1939; RV32-NEXT:    vand.vv v8, v8, v16
1940; RV32-NEXT:    vand.vv v16, v24, v16
1941; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1942; RV32-NEXT:    vlse64.v v24, (a2), zero
1943; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1944; RV32-NEXT:    vsll.vi v8, v8, 4
1945; RV32-NEXT:    vor.vv v8, v16, v8
1946; RV32-NEXT:    vsrl.vi v16, v8, 2
1947; RV32-NEXT:    vand.vv v8, v8, v24
1948; RV32-NEXT:    vand.vv v16, v16, v24
1949; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1950; RV32-NEXT:    vlse64.v v24, (a1), zero
1951; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1952; RV32-NEXT:    vsll.vi v8, v8, 2
1953; RV32-NEXT:    vor.vv v8, v16, v8
1954; RV32-NEXT:    vsrl.vi v16, v8, 1
1955; RV32-NEXT:    vand.vv v8, v8, v24
1956; RV32-NEXT:    vand.vv v16, v16, v24
1957; RV32-NEXT:    vadd.vv v8, v8, v8
1958; RV32-NEXT:    vor.vv v8, v16, v8
1959; RV32-NEXT:    csrr a0, vlenb
1960; RV32-NEXT:    slli a0, a0, 4
1961; RV32-NEXT:    add sp, sp, a0
1962; RV32-NEXT:    .cfi_def_cfa sp, 48
1963; RV32-NEXT:    addi sp, sp, 48
1964; RV32-NEXT:    .cfi_def_cfa_offset 0
1965; RV32-NEXT:    ret
1966;
1967; RV64-LABEL: vp_bitreverse_v15i64_unmasked:
1968; RV64:       # %bb.0:
1969; RV64-NEXT:    addi sp, sp, -16
1970; RV64-NEXT:    .cfi_def_cfa_offset 16
1971; RV64-NEXT:    csrr a1, vlenb
1972; RV64-NEXT:    slli a1, a1, 3
1973; RV64-NEXT:    sub sp, sp, a1
1974; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
1975; RV64-NEXT:    lui a1, 4080
1976; RV64-NEXT:    li a2, 255
1977; RV64-NEXT:    li a3, 56
1978; RV64-NEXT:    lui a4, 16
1979; RV64-NEXT:    li a5, 40
1980; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1981; RV64-NEXT:    vsrl.vi v24, v8, 24
1982; RV64-NEXT:    addiw a0, a4, -256
1983; RV64-NEXT:    vsrl.vx v16, v8, a3
1984; RV64-NEXT:    vsrl.vx v0, v8, a5
1985; RV64-NEXT:    vand.vx v0, v0, a0
1986; RV64-NEXT:    vor.vv v16, v0, v16
1987; RV64-NEXT:    addi a4, sp, 16
1988; RV64-NEXT:    vs8r.v v16, (a4) # Unknown-size Folded Spill
1989; RV64-NEXT:    vsrl.vi v0, v8, 8
1990; RV64-NEXT:    slli a2, a2, 24
1991; RV64-NEXT:    vand.vx v24, v24, a1
1992; RV64-NEXT:    vand.vx v0, v0, a2
1993; RV64-NEXT:    vor.vv v24, v0, v24
1994; RV64-NEXT:    vand.vx v0, v8, a1
1995; RV64-NEXT:    vsll.vi v0, v0, 24
1996; RV64-NEXT:    vand.vx v16, v8, a2
1997; RV64-NEXT:    vsll.vi v16, v16, 8
1998; RV64-NEXT:    vor.vv v0, v0, v16
1999; RV64-NEXT:    vsll.vx v16, v8, a3
2000; RV64-NEXT:    vand.vx v8, v8, a0
2001; RV64-NEXT:    vsll.vx v8, v8, a5
2002; RV64-NEXT:    vor.vv v8, v16, v8
2003; RV64-NEXT:    lui a0, 61681
2004; RV64-NEXT:    lui a1, 209715
2005; RV64-NEXT:    lui a2, 349525
2006; RV64-NEXT:    addiw a0, a0, -241
2007; RV64-NEXT:    addiw a1, a1, 819
2008; RV64-NEXT:    addiw a2, a2, 1365
2009; RV64-NEXT:    slli a3, a0, 32
2010; RV64-NEXT:    slli a4, a1, 32
2011; RV64-NEXT:    add a0, a0, a3
2012; RV64-NEXT:    slli a3, a2, 32
2013; RV64-NEXT:    add a1, a1, a4
2014; RV64-NEXT:    add a2, a2, a3
2015; RV64-NEXT:    addi a3, sp, 16
2016; RV64-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
2017; RV64-NEXT:    vor.vv v16, v24, v16
2018; RV64-NEXT:    vor.vv v8, v8, v0
2019; RV64-NEXT:    vor.vv v8, v8, v16
2020; RV64-NEXT:    vsrl.vi v16, v8, 4
2021; RV64-NEXT:    vand.vx v8, v8, a0
2022; RV64-NEXT:    vand.vx v16, v16, a0
2023; RV64-NEXT:    vsll.vi v8, v8, 4
2024; RV64-NEXT:    vor.vv v8, v16, v8
2025; RV64-NEXT:    vsrl.vi v16, v8, 2
2026; RV64-NEXT:    vand.vx v8, v8, a1
2027; RV64-NEXT:    vand.vx v16, v16, a1
2028; RV64-NEXT:    vsll.vi v8, v8, 2
2029; RV64-NEXT:    vor.vv v8, v16, v8
2030; RV64-NEXT:    vsrl.vi v16, v8, 1
2031; RV64-NEXT:    vand.vx v8, v8, a2
2032; RV64-NEXT:    vand.vx v16, v16, a2
2033; RV64-NEXT:    vadd.vv v8, v8, v8
2034; RV64-NEXT:    vor.vv v8, v16, v8
2035; RV64-NEXT:    csrr a0, vlenb
2036; RV64-NEXT:    slli a0, a0, 3
2037; RV64-NEXT:    add sp, sp, a0
2038; RV64-NEXT:    .cfi_def_cfa sp, 16
2039; RV64-NEXT:    addi sp, sp, 16
2040; RV64-NEXT:    .cfi_def_cfa_offset 0
2041; RV64-NEXT:    ret
2042  %v = call <15 x i64> @llvm.vp.bitreverse.v15i64(<15 x i64> %va, <15 x i1> splat (i1 true), i32 %evl)
2043  ret <15 x i64> %v
2044}
2045
2046declare <16 x i64> @llvm.vp.bitreverse.v16i64(<16 x i64>, <16 x i1>, i32)
2047
2048define <16 x i64> @vp_bitreverse_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) {
2049; RV32-LABEL: vp_bitreverse_v16i64:
2050; RV32:       # %bb.0:
2051; RV32-NEXT:    addi sp, sp, -48
2052; RV32-NEXT:    .cfi_def_cfa_offset 48
2053; RV32-NEXT:    csrr a1, vlenb
2054; RV32-NEXT:    li a2, 24
2055; RV32-NEXT:    mul a1, a1, a2
2056; RV32-NEXT:    sub sp, sp, a1
2057; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 24 * vlenb
2058; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2059; RV32-NEXT:    vmv8r.v v24, v8
2060; RV32-NEXT:    lui a2, 1044480
2061; RV32-NEXT:    lui a3, 61681
2062; RV32-NEXT:    lui a4, 209715
2063; RV32-NEXT:    lui a5, 349525
2064; RV32-NEXT:    li a1, 56
2065; RV32-NEXT:    lui a6, 16
2066; RV32-NEXT:    sw a2, 16(sp)
2067; RV32-NEXT:    sw zero, 20(sp)
2068; RV32-NEXT:    addi a2, a3, -241
2069; RV32-NEXT:    sw a2, 40(sp)
2070; RV32-NEXT:    sw a2, 44(sp)
2071; RV32-NEXT:    li a2, 40
2072; RV32-NEXT:    addi a3, a4, 819
2073; RV32-NEXT:    sw a3, 32(sp)
2074; RV32-NEXT:    sw a3, 36(sp)
2075; RV32-NEXT:    addi a3, sp, 16
2076; RV32-NEXT:    addi a4, a5, 1365
2077; RV32-NEXT:    vsll.vx v16, v8, a1, v0.t
2078; RV32-NEXT:    addi a5, a6, -256
2079; RV32-NEXT:    sw a4, 24(sp)
2080; RV32-NEXT:    sw a4, 28(sp)
2081; RV32-NEXT:    vand.vx v8, v8, a5, v0.t
2082; RV32-NEXT:    vsll.vx v8, v8, a2, v0.t
2083; RV32-NEXT:    vor.vv v8, v16, v8, v0.t
2084; RV32-NEXT:    csrr a4, vlenb
2085; RV32-NEXT:    slli a4, a4, 4
2086; RV32-NEXT:    add a4, sp, a4
2087; RV32-NEXT:    addi a4, a4, 48
2088; RV32-NEXT:    vs8r.v v8, (a4) # Unknown-size Folded Spill
2089; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
2090; RV32-NEXT:    vlse64.v v8, (a3), zero
2091; RV32-NEXT:    csrr a3, vlenb
2092; RV32-NEXT:    slli a3, a3, 3
2093; RV32-NEXT:    add a3, sp, a3
2094; RV32-NEXT:    addi a3, a3, 48
2095; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
2096; RV32-NEXT:    lui a3, 4080
2097; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2098; RV32-NEXT:    vand.vx v16, v24, a3, v0.t
2099; RV32-NEXT:    vsll.vi v16, v16, 24, v0.t
2100; RV32-NEXT:    addi a4, sp, 48
2101; RV32-NEXT:    vs8r.v v16, (a4) # Unknown-size Folded Spill
2102; RV32-NEXT:    vand.vv v16, v24, v8, v0.t
2103; RV32-NEXT:    vsll.vi v16, v16, 8, v0.t
2104; RV32-NEXT:    vl8r.v v8, (a4) # Unknown-size Folded Reload
2105; RV32-NEXT:    vor.vv v16, v8, v16, v0.t
2106; RV32-NEXT:    csrr a4, vlenb
2107; RV32-NEXT:    slli a4, a4, 4
2108; RV32-NEXT:    add a4, sp, a4
2109; RV32-NEXT:    addi a4, a4, 48
2110; RV32-NEXT:    vl8r.v v8, (a4) # Unknown-size Folded Reload
2111; RV32-NEXT:    vor.vv v16, v8, v16, v0.t
2112; RV32-NEXT:    csrr a4, vlenb
2113; RV32-NEXT:    slli a4, a4, 4
2114; RV32-NEXT:    add a4, sp, a4
2115; RV32-NEXT:    addi a4, a4, 48
2116; RV32-NEXT:    vs8r.v v16, (a4) # Unknown-size Folded Spill
2117; RV32-NEXT:    vsrl.vx v16, v24, a1, v0.t
2118; RV32-NEXT:    vsrl.vx v8, v24, a2, v0.t
2119; RV32-NEXT:    vand.vx v8, v8, a5, v0.t
2120; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
2121; RV32-NEXT:    addi a1, sp, 48
2122; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
2123; RV32-NEXT:    vsrl.vi v8, v24, 24, v0.t
2124; RV32-NEXT:    vand.vx v16, v8, a3, v0.t
2125; RV32-NEXT:    vsrl.vi v8, v24, 8, v0.t
2126; RV32-NEXT:    csrr a1, vlenb
2127; RV32-NEXT:    slli a1, a1, 3
2128; RV32-NEXT:    add a1, sp, a1
2129; RV32-NEXT:    addi a1, a1, 48
2130; RV32-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
2131; RV32-NEXT:    vand.vv v8, v8, v24, v0.t
2132; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
2133; RV32-NEXT:    addi a1, sp, 48
2134; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
2135; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
2136; RV32-NEXT:    addi a1, sp, 40
2137; RV32-NEXT:    addi a2, sp, 32
2138; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
2139; RV32-NEXT:    vlse64.v v24, (a1), zero
2140; RV32-NEXT:    addi a1, sp, 24
2141; RV32-NEXT:    csrr a3, vlenb
2142; RV32-NEXT:    slli a3, a3, 4
2143; RV32-NEXT:    add a3, sp, a3
2144; RV32-NEXT:    addi a3, a3, 48
2145; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
2146; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2147; RV32-NEXT:    vor.vv v8, v16, v8, v0.t
2148; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
2149; RV32-NEXT:    vand.vv v16, v16, v24, v0.t
2150; RV32-NEXT:    vand.vv v24, v8, v24, v0.t
2151; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
2152; RV32-NEXT:    vlse64.v v8, (a2), zero
2153; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2154; RV32-NEXT:    vsll.vi v24, v24, 4, v0.t
2155; RV32-NEXT:    vor.vv v24, v16, v24, v0.t
2156; RV32-NEXT:    vsrl.vi v16, v24, 2, v0.t
2157; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
2158; RV32-NEXT:    vand.vv v24, v24, v8, v0.t
2159; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
2160; RV32-NEXT:    vlse64.v v8, (a1), zero
2161; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2162; RV32-NEXT:    vsll.vi v24, v24, 2, v0.t
2163; RV32-NEXT:    vor.vv v16, v16, v24, v0.t
2164; RV32-NEXT:    vsrl.vi v24, v16, 1, v0.t
2165; RV32-NEXT:    vand.vv v24, v24, v8, v0.t
2166; RV32-NEXT:    vand.vv v8, v16, v8, v0.t
2167; RV32-NEXT:    vsll.vi v8, v8, 1, v0.t
2168; RV32-NEXT:    vor.vv v8, v24, v8, v0.t
2169; RV32-NEXT:    csrr a0, vlenb
2170; RV32-NEXT:    li a1, 24
2171; RV32-NEXT:    mul a0, a0, a1
2172; RV32-NEXT:    add sp, sp, a0
2173; RV32-NEXT:    .cfi_def_cfa sp, 48
2174; RV32-NEXT:    addi sp, sp, 48
2175; RV32-NEXT:    .cfi_def_cfa_offset 0
2176; RV32-NEXT:    ret
2177;
2178; RV64-LABEL: vp_bitreverse_v16i64:
2179; RV64:       # %bb.0:
2180; RV64-NEXT:    addi sp, sp, -16
2181; RV64-NEXT:    .cfi_def_cfa_offset 16
2182; RV64-NEXT:    csrr a1, vlenb
2183; RV64-NEXT:    slli a1, a1, 3
2184; RV64-NEXT:    sub sp, sp, a1
2185; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
2186; RV64-NEXT:    lui a1, 4080
2187; RV64-NEXT:    li a2, 255
2188; RV64-NEXT:    li a3, 56
2189; RV64-NEXT:    lui a4, 16
2190; RV64-NEXT:    li a5, 40
2191; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2192; RV64-NEXT:    vand.vx v16, v8, a1, v0.t
2193; RV64-NEXT:    slli a2, a2, 24
2194; RV64-NEXT:    addiw a0, a4, -256
2195; RV64-NEXT:    vsll.vi v16, v16, 24, v0.t
2196; RV64-NEXT:    vand.vx v24, v8, a2, v0.t
2197; RV64-NEXT:    vsll.vi v24, v24, 8, v0.t
2198; RV64-NEXT:    vor.vv v16, v16, v24, v0.t
2199; RV64-NEXT:    addi a4, sp, 16
2200; RV64-NEXT:    vs8r.v v16, (a4) # Unknown-size Folded Spill
2201; RV64-NEXT:    vsll.vx v24, v8, a3, v0.t
2202; RV64-NEXT:    vand.vx v16, v8, a0, v0.t
2203; RV64-NEXT:    vsll.vx v16, v16, a5, v0.t
2204; RV64-NEXT:    vor.vv v16, v24, v16, v0.t
2205; RV64-NEXT:    vl8r.v v24, (a4) # Unknown-size Folded Reload
2206; RV64-NEXT:    vor.vv v16, v16, v24, v0.t
2207; RV64-NEXT:    vs8r.v v16, (a4) # Unknown-size Folded Spill
2208; RV64-NEXT:    vsrl.vx v24, v8, a3, v0.t
2209; RV64-NEXT:    vsrl.vx v16, v8, a5, v0.t
2210; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
2211; RV64-NEXT:    vor.vv v24, v16, v24, v0.t
2212; RV64-NEXT:    vsrl.vi v16, v8, 24, v0.t
2213; RV64-NEXT:    vand.vx v16, v16, a1, v0.t
2214; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
2215; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
2216; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
2217; RV64-NEXT:    vor.vv v8, v8, v24, v0.t
2218; RV64-NEXT:    lui a0, 61681
2219; RV64-NEXT:    lui a1, 209715
2220; RV64-NEXT:    lui a2, 349525
2221; RV64-NEXT:    addiw a0, a0, -241
2222; RV64-NEXT:    addiw a1, a1, 819
2223; RV64-NEXT:    addiw a2, a2, 1365
2224; RV64-NEXT:    slli a3, a0, 32
2225; RV64-NEXT:    slli a4, a1, 32
2226; RV64-NEXT:    add a0, a0, a3
2227; RV64-NEXT:    slli a3, a2, 32
2228; RV64-NEXT:    add a1, a1, a4
2229; RV64-NEXT:    add a2, a2, a3
2230; RV64-NEXT:    addi a3, sp, 16
2231; RV64-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
2232; RV64-NEXT:    vor.vv v8, v16, v8, v0.t
2233; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
2234; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
2235; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
2236; RV64-NEXT:    vsll.vi v8, v8, 4, v0.t
2237; RV64-NEXT:    vor.vv v8, v16, v8, v0.t
2238; RV64-NEXT:    vsrl.vi v16, v8, 2, v0.t
2239; RV64-NEXT:    vand.vx v16, v16, a1, v0.t
2240; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
2241; RV64-NEXT:    vsll.vi v8, v8, 2, v0.t
2242; RV64-NEXT:    vor.vv v8, v16, v8, v0.t
2243; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
2244; RV64-NEXT:    vand.vx v16, v16, a2, v0.t
2245; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
2246; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
2247; RV64-NEXT:    vor.vv v8, v16, v8, v0.t
2248; RV64-NEXT:    csrr a0, vlenb
2249; RV64-NEXT:    slli a0, a0, 3
2250; RV64-NEXT:    add sp, sp, a0
2251; RV64-NEXT:    .cfi_def_cfa sp, 16
2252; RV64-NEXT:    addi sp, sp, 16
2253; RV64-NEXT:    .cfi_def_cfa_offset 0
2254; RV64-NEXT:    ret
2255  %v = call <16 x i64> @llvm.vp.bitreverse.v16i64(<16 x i64> %va, <16 x i1> %m, i32 %evl)
2256  ret <16 x i64> %v
2257}
2258
2259define <16 x i64> @vp_bitreverse_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) {
2260; RV32-LABEL: vp_bitreverse_v16i64_unmasked:
2261; RV32:       # %bb.0:
2262; RV32-NEXT:    addi sp, sp, -48
2263; RV32-NEXT:    .cfi_def_cfa_offset 48
2264; RV32-NEXT:    csrr a1, vlenb
2265; RV32-NEXT:    slli a1, a1, 4
2266; RV32-NEXT:    sub sp, sp, a1
2267; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb
2268; RV32-NEXT:    lui a1, 1044480
2269; RV32-NEXT:    lui a2, 61681
2270; RV32-NEXT:    lui a3, 209715
2271; RV32-NEXT:    lui a4, 349525
2272; RV32-NEXT:    li a5, 56
2273; RV32-NEXT:    lui a6, 16
2274; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2275; RV32-NEXT:    vsll.vx v16, v8, a5
2276; RV32-NEXT:    vsrl.vx v24, v8, a5
2277; RV32-NEXT:    li a5, 40
2278; RV32-NEXT:    sw a1, 16(sp)
2279; RV32-NEXT:    sw zero, 20(sp)
2280; RV32-NEXT:    addi a1, a2, -241
2281; RV32-NEXT:    sw a1, 40(sp)
2282; RV32-NEXT:    sw a1, 44(sp)
2283; RV32-NEXT:    lui a1, 4080
2284; RV32-NEXT:    addi a2, a3, 819
2285; RV32-NEXT:    sw a2, 32(sp)
2286; RV32-NEXT:    sw a2, 36(sp)
2287; RV32-NEXT:    addi a2, sp, 16
2288; RV32-NEXT:    addi a3, a4, 1365
2289; RV32-NEXT:    addi a4, a6, -256
2290; RV32-NEXT:    vsrl.vx v0, v8, a5
2291; RV32-NEXT:    sw a3, 24(sp)
2292; RV32-NEXT:    sw a3, 28(sp)
2293; RV32-NEXT:    vand.vx v0, v0, a4
2294; RV32-NEXT:    vor.vv v24, v0, v24
2295; RV32-NEXT:    addi a3, sp, 48
2296; RV32-NEXT:    vs8r.v v24, (a3) # Unknown-size Folded Spill
2297; RV32-NEXT:    vand.vx v0, v8, a4
2298; RV32-NEXT:    vsll.vx v0, v0, a5
2299; RV32-NEXT:    vor.vv v16, v16, v0
2300; RV32-NEXT:    csrr a3, vlenb
2301; RV32-NEXT:    slli a3, a3, 3
2302; RV32-NEXT:    add a3, sp, a3
2303; RV32-NEXT:    addi a3, a3, 48
2304; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
2305; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
2306; RV32-NEXT:    vlse64.v v0, (a2), zero
2307; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2308; RV32-NEXT:    vsrl.vi v16, v8, 24
2309; RV32-NEXT:    vand.vx v16, v16, a1
2310; RV32-NEXT:    vsrl.vi v24, v8, 8
2311; RV32-NEXT:    vand.vv v24, v24, v0
2312; RV32-NEXT:    vor.vv v16, v24, v16
2313; RV32-NEXT:    vand.vv v24, v8, v0
2314; RV32-NEXT:    vand.vx v8, v8, a1
2315; RV32-NEXT:    vsll.vi v8, v8, 24
2316; RV32-NEXT:    vsll.vi v24, v24, 8
2317; RV32-NEXT:    vor.vv v0, v8, v24
2318; RV32-NEXT:    addi a1, sp, 48
2319; RV32-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
2320; RV32-NEXT:    vor.vv v8, v16, v8
2321; RV32-NEXT:    addi a1, sp, 40
2322; RV32-NEXT:    addi a2, sp, 32
2323; RV32-NEXT:    csrr a3, vlenb
2324; RV32-NEXT:    slli a3, a3, 3
2325; RV32-NEXT:    add a3, sp, a3
2326; RV32-NEXT:    addi a3, a3, 48
2327; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
2328; RV32-NEXT:    vor.vv v24, v16, v0
2329; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
2330; RV32-NEXT:    vlse64.v v16, (a1), zero
2331; RV32-NEXT:    addi a1, sp, 24
2332; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2333; RV32-NEXT:    vor.vv v8, v24, v8
2334; RV32-NEXT:    vsrl.vi v24, v8, 4
2335; RV32-NEXT:    vand.vv v8, v8, v16
2336; RV32-NEXT:    vand.vv v16, v24, v16
2337; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
2338; RV32-NEXT:    vlse64.v v24, (a2), zero
2339; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2340; RV32-NEXT:    vsll.vi v8, v8, 4
2341; RV32-NEXT:    vor.vv v8, v16, v8
2342; RV32-NEXT:    vsrl.vi v16, v8, 2
2343; RV32-NEXT:    vand.vv v8, v8, v24
2344; RV32-NEXT:    vand.vv v16, v16, v24
2345; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
2346; RV32-NEXT:    vlse64.v v24, (a1), zero
2347; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2348; RV32-NEXT:    vsll.vi v8, v8, 2
2349; RV32-NEXT:    vor.vv v8, v16, v8
2350; RV32-NEXT:    vsrl.vi v16, v8, 1
2351; RV32-NEXT:    vand.vv v8, v8, v24
2352; RV32-NEXT:    vand.vv v16, v16, v24
2353; RV32-NEXT:    vadd.vv v8, v8, v8
2354; RV32-NEXT:    vor.vv v8, v16, v8
2355; RV32-NEXT:    csrr a0, vlenb
2356; RV32-NEXT:    slli a0, a0, 4
2357; RV32-NEXT:    add sp, sp, a0
2358; RV32-NEXT:    .cfi_def_cfa sp, 48
2359; RV32-NEXT:    addi sp, sp, 48
2360; RV32-NEXT:    .cfi_def_cfa_offset 0
2361; RV32-NEXT:    ret
2362;
2363; RV64-LABEL: vp_bitreverse_v16i64_unmasked:
2364; RV64:       # %bb.0:
2365; RV64-NEXT:    addi sp, sp, -16
2366; RV64-NEXT:    .cfi_def_cfa_offset 16
2367; RV64-NEXT:    csrr a1, vlenb
2368; RV64-NEXT:    slli a1, a1, 3
2369; RV64-NEXT:    sub sp, sp, a1
2370; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
2371; RV64-NEXT:    lui a1, 4080
2372; RV64-NEXT:    li a2, 255
2373; RV64-NEXT:    li a3, 56
2374; RV64-NEXT:    lui a4, 16
2375; RV64-NEXT:    li a5, 40
2376; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2377; RV64-NEXT:    vsrl.vi v24, v8, 24
2378; RV64-NEXT:    addiw a0, a4, -256
2379; RV64-NEXT:    vsrl.vx v16, v8, a3
2380; RV64-NEXT:    vsrl.vx v0, v8, a5
2381; RV64-NEXT:    vand.vx v0, v0, a0
2382; RV64-NEXT:    vor.vv v16, v0, v16
2383; RV64-NEXT:    addi a4, sp, 16
2384; RV64-NEXT:    vs8r.v v16, (a4) # Unknown-size Folded Spill
2385; RV64-NEXT:    vsrl.vi v0, v8, 8
2386; RV64-NEXT:    slli a2, a2, 24
2387; RV64-NEXT:    vand.vx v24, v24, a1
2388; RV64-NEXT:    vand.vx v0, v0, a2
2389; RV64-NEXT:    vor.vv v24, v0, v24
2390; RV64-NEXT:    vand.vx v0, v8, a1
2391; RV64-NEXT:    vsll.vi v0, v0, 24
2392; RV64-NEXT:    vand.vx v16, v8, a2
2393; RV64-NEXT:    vsll.vi v16, v16, 8
2394; RV64-NEXT:    vor.vv v0, v0, v16
2395; RV64-NEXT:    vsll.vx v16, v8, a3
2396; RV64-NEXT:    vand.vx v8, v8, a0
2397; RV64-NEXT:    vsll.vx v8, v8, a5
2398; RV64-NEXT:    vor.vv v8, v16, v8
2399; RV64-NEXT:    lui a0, 61681
2400; RV64-NEXT:    lui a1, 209715
2401; RV64-NEXT:    lui a2, 349525
2402; RV64-NEXT:    addiw a0, a0, -241
2403; RV64-NEXT:    addiw a1, a1, 819
2404; RV64-NEXT:    addiw a2, a2, 1365
2405; RV64-NEXT:    slli a3, a0, 32
2406; RV64-NEXT:    slli a4, a1, 32
2407; RV64-NEXT:    add a0, a0, a3
2408; RV64-NEXT:    slli a3, a2, 32
2409; RV64-NEXT:    add a1, a1, a4
2410; RV64-NEXT:    add a2, a2, a3
2411; RV64-NEXT:    addi a3, sp, 16
2412; RV64-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
2413; RV64-NEXT:    vor.vv v16, v24, v16
2414; RV64-NEXT:    vor.vv v8, v8, v0
2415; RV64-NEXT:    vor.vv v8, v8, v16
2416; RV64-NEXT:    vsrl.vi v16, v8, 4
2417; RV64-NEXT:    vand.vx v8, v8, a0
2418; RV64-NEXT:    vand.vx v16, v16, a0
2419; RV64-NEXT:    vsll.vi v8, v8, 4
2420; RV64-NEXT:    vor.vv v8, v16, v8
2421; RV64-NEXT:    vsrl.vi v16, v8, 2
2422; RV64-NEXT:    vand.vx v8, v8, a1
2423; RV64-NEXT:    vand.vx v16, v16, a1
2424; RV64-NEXT:    vsll.vi v8, v8, 2
2425; RV64-NEXT:    vor.vv v8, v16, v8
2426; RV64-NEXT:    vsrl.vi v16, v8, 1
2427; RV64-NEXT:    vand.vx v8, v8, a2
2428; RV64-NEXT:    vand.vx v16, v16, a2
2429; RV64-NEXT:    vadd.vv v8, v8, v8
2430; RV64-NEXT:    vor.vv v8, v16, v8
2431; RV64-NEXT:    csrr a0, vlenb
2432; RV64-NEXT:    slli a0, a0, 3
2433; RV64-NEXT:    add sp, sp, a0
2434; RV64-NEXT:    .cfi_def_cfa sp, 16
2435; RV64-NEXT:    addi sp, sp, 16
2436; RV64-NEXT:    .cfi_def_cfa_offset 0
2437; RV64-NEXT:    ret
2438  %v = call <16 x i64> @llvm.vp.bitreverse.v16i64(<16 x i64> %va, <16 x i1> splat (i1 true), i32 %evl)
2439  ret <16 x i64> %v
2440}
2441
2442declare <128 x i16> @llvm.vp.bitreverse.v128i16(<128 x i16>, <128 x i1>, i32)
2443
2444define <128 x i16> @vp_bitreverse_v128i16(<128 x i16> %va, <128 x i1> %m, i32 zeroext %evl) {
2445; CHECK-LABEL: vp_bitreverse_v128i16:
2446; CHECK:       # %bb.0:
2447; CHECK-NEXT:    addi sp, sp, -16
2448; CHECK-NEXT:    .cfi_def_cfa_offset 16
2449; CHECK-NEXT:    csrr a1, vlenb
2450; CHECK-NEXT:    slli a1, a1, 4
2451; CHECK-NEXT:    sub sp, sp, a1
2452; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
2453; CHECK-NEXT:    csrr a1, vlenb
2454; CHECK-NEXT:    slli a1, a1, 3
2455; CHECK-NEXT:    add a1, sp, a1
2456; CHECK-NEXT:    addi a1, a1, 16
2457; CHECK-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
2458; CHECK-NEXT:    li a2, 64
2459; CHECK-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
2460; CHECK-NEXT:    vslidedown.vi v24, v0, 8
2461; CHECK-NEXT:    mv a1, a0
2462; CHECK-NEXT:    bltu a0, a2, .LBB34_2
2463; CHECK-NEXT:  # %bb.1:
2464; CHECK-NEXT:    li a1, 64
2465; CHECK-NEXT:  .LBB34_2:
2466; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
2467; CHECK-NEXT:    vsrl.vi v16, v8, 8, v0.t
2468; CHECK-NEXT:    lui a1, 1
2469; CHECK-NEXT:    lui a2, 3
2470; CHECK-NEXT:    addi a3, a0, -64
2471; CHECK-NEXT:    sltu a0, a0, a3
2472; CHECK-NEXT:    addi a0, a0, -1
2473; CHECK-NEXT:    and a3, a0, a3
2474; CHECK-NEXT:    lui a0, 5
2475; CHECK-NEXT:    vsll.vi v8, v8, 8, v0.t
2476; CHECK-NEXT:    addi a4, a1, -241
2477; CHECK-NEXT:    addi a1, a2, 819
2478; CHECK-NEXT:    addi a0, a0, 1365
2479; CHECK-NEXT:    vor.vv v8, v8, v16, v0.t
2480; CHECK-NEXT:    vsrl.vi v16, v8, 4, v0.t
2481; CHECK-NEXT:    vand.vx v16, v16, a4, v0.t
2482; CHECK-NEXT:    vand.vx v8, v8, a4, v0.t
2483; CHECK-NEXT:    vsll.vi v8, v8, 4, v0.t
2484; CHECK-NEXT:    vor.vv v8, v16, v8, v0.t
2485; CHECK-NEXT:    vsrl.vi v16, v8, 2, v0.t
2486; CHECK-NEXT:    vand.vx v16, v16, a1, v0.t
2487; CHECK-NEXT:    vand.vx v8, v8, a1, v0.t
2488; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
2489; CHECK-NEXT:    vor.vv v8, v16, v8, v0.t
2490; CHECK-NEXT:    vsrl.vi v16, v8, 1, v0.t
2491; CHECK-NEXT:    vand.vx v16, v16, a0, v0.t
2492; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
2493; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
2494; CHECK-NEXT:    vor.vv v8, v16, v8, v0.t
2495; CHECK-NEXT:    addi a2, sp, 16
2496; CHECK-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
2497; CHECK-NEXT:    vmv1r.v v0, v24
2498; CHECK-NEXT:    csrr a2, vlenb
2499; CHECK-NEXT:    slli a2, a2, 3
2500; CHECK-NEXT:    add a2, sp, a2
2501; CHECK-NEXT:    addi a2, a2, 16
2502; CHECK-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
2503; CHECK-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
2504; CHECK-NEXT:    vsrl.vi v16, v8, 8, v0.t
2505; CHECK-NEXT:    vsll.vi v8, v8, 8, v0.t
2506; CHECK-NEXT:    vor.vv v8, v8, v16, v0.t
2507; CHECK-NEXT:    vsrl.vi v16, v8, 4, v0.t
2508; CHECK-NEXT:    vand.vx v16, v16, a4, v0.t
2509; CHECK-NEXT:    vand.vx v8, v8, a4, v0.t
2510; CHECK-NEXT:    vsll.vi v8, v8, 4, v0.t
2511; CHECK-NEXT:    vor.vv v8, v16, v8, v0.t
2512; CHECK-NEXT:    vsrl.vi v16, v8, 2, v0.t
2513; CHECK-NEXT:    vand.vx v16, v16, a1, v0.t
2514; CHECK-NEXT:    vand.vx v8, v8, a1, v0.t
2515; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
2516; CHECK-NEXT:    vor.vv v8, v16, v8, v0.t
2517; CHECK-NEXT:    vsrl.vi v16, v8, 1, v0.t
2518; CHECK-NEXT:    vand.vx v16, v16, a0, v0.t
2519; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
2520; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
2521; CHECK-NEXT:    vor.vv v16, v16, v8, v0.t
2522; CHECK-NEXT:    addi a0, sp, 16
2523; CHECK-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
2524; CHECK-NEXT:    csrr a0, vlenb
2525; CHECK-NEXT:    slli a0, a0, 4
2526; CHECK-NEXT:    add sp, sp, a0
2527; CHECK-NEXT:    .cfi_def_cfa sp, 16
2528; CHECK-NEXT:    addi sp, sp, 16
2529; CHECK-NEXT:    .cfi_def_cfa_offset 0
2530; CHECK-NEXT:    ret
2531  %v = call <128 x i16> @llvm.vp.bitreverse.v128i16(<128 x i16> %va, <128 x i1> %m, i32 %evl)
2532  ret <128 x i16> %v
2533}
2534
2535define <128 x i16> @vp_bitreverse_v128i16_unmasked(<128 x i16> %va, i32 zeroext %evl) {
2536; CHECK-LABEL: vp_bitreverse_v128i16_unmasked:
2537; CHECK:       # %bb.0:
2538; CHECK-NEXT:    li a2, 64
2539; CHECK-NEXT:    mv a1, a0
2540; CHECK-NEXT:    bltu a0, a2, .LBB35_2
2541; CHECK-NEXT:  # %bb.1:
2542; CHECK-NEXT:    li a1, 64
2543; CHECK-NEXT:  .LBB35_2:
2544; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
2545; CHECK-NEXT:    vsrl.vi v24, v8, 8
2546; CHECK-NEXT:    vsll.vi v8, v8, 8
2547; CHECK-NEXT:    lui a2, 1
2548; CHECK-NEXT:    lui a3, 3
2549; CHECK-NEXT:    addi a4, a0, -64
2550; CHECK-NEXT:    sltu a0, a0, a4
2551; CHECK-NEXT:    addi a0, a0, -1
2552; CHECK-NEXT:    and a0, a0, a4
2553; CHECK-NEXT:    lui a4, 5
2554; CHECK-NEXT:    vor.vv v8, v8, v24
2555; CHECK-NEXT:    addi a2, a2, -241
2556; CHECK-NEXT:    addi a3, a3, 819
2557; CHECK-NEXT:    addi a4, a4, 1365
2558; CHECK-NEXT:    vsrl.vi v24, v8, 4
2559; CHECK-NEXT:    vand.vx v8, v8, a2
2560; CHECK-NEXT:    vand.vx v24, v24, a2
2561; CHECK-NEXT:    vsll.vi v8, v8, 4
2562; CHECK-NEXT:    vor.vv v8, v24, v8
2563; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
2564; CHECK-NEXT:    vsrl.vi v24, v16, 8
2565; CHECK-NEXT:    vsll.vi v16, v16, 8
2566; CHECK-NEXT:    vor.vv v16, v16, v24
2567; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
2568; CHECK-NEXT:    vsrl.vi v24, v8, 2
2569; CHECK-NEXT:    vand.vx v8, v8, a3
2570; CHECK-NEXT:    vand.vx v24, v24, a3
2571; CHECK-NEXT:    vsll.vi v8, v8, 2
2572; CHECK-NEXT:    vor.vv v8, v24, v8
2573; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
2574; CHECK-NEXT:    vsrl.vi v24, v16, 4
2575; CHECK-NEXT:    vand.vx v16, v16, a2
2576; CHECK-NEXT:    vand.vx v24, v24, a2
2577; CHECK-NEXT:    vsll.vi v16, v16, 4
2578; CHECK-NEXT:    vor.vv v16, v24, v16
2579; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
2580; CHECK-NEXT:    vsrl.vi v24, v8, 1
2581; CHECK-NEXT:    vand.vx v8, v8, a4
2582; CHECK-NEXT:    vand.vx v24, v24, a4
2583; CHECK-NEXT:    vadd.vv v8, v8, v8
2584; CHECK-NEXT:    vor.vv v8, v24, v8
2585; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
2586; CHECK-NEXT:    vsrl.vi v24, v16, 2
2587; CHECK-NEXT:    vand.vx v16, v16, a3
2588; CHECK-NEXT:    vand.vx v24, v24, a3
2589; CHECK-NEXT:    vsll.vi v16, v16, 2
2590; CHECK-NEXT:    vor.vv v16, v24, v16
2591; CHECK-NEXT:    vsrl.vi v24, v16, 1
2592; CHECK-NEXT:    vand.vx v16, v16, a4
2593; CHECK-NEXT:    vand.vx v24, v24, a4
2594; CHECK-NEXT:    vadd.vv v16, v16, v16
2595; CHECK-NEXT:    vor.vv v16, v24, v16
2596; CHECK-NEXT:    ret
2597  %v = call <128 x i16> @llvm.vp.bitreverse.v128i16(<128 x i16> %va, <128 x i1> splat (i1 true), i32 %evl)
2598  ret <128 x i16> %v
2599}
2600