xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll (revision 36e4176f1d83d04cdebb4e1870561099b2478d80)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+v,+m -target-abi=ilp32d \
3; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
4; RUN: llc -mtriple=riscv64 -mattr=+v,+m -target-abi=lp64d \
5; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
6; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb,+m -target-abi=ilp32d \
7; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB
8; RUN: llc -mtriple=riscv64 -mattr=+v,+zvbb,+m -target-abi=lp64d \
9; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB
10
11declare <vscale x 1 x i8> @llvm.vp.bitreverse.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i1>, i32)
12
13define <vscale x 1 x i8> @vp_bitreverse_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
14; CHECK-LABEL: vp_bitreverse_nxv1i8:
15; CHECK:       # %bb.0:
16; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
17; CHECK-NEXT:    vand.vi v9, v8, 15, v0.t
18; CHECK-NEXT:    li a0, 51
19; CHECK-NEXT:    vsll.vi v9, v9, 4, v0.t
20; CHECK-NEXT:    vsrl.vi v8, v8, 4, v0.t
21; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
22; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
23; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
24; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
25; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
26; CHECK-NEXT:    li a0, 85
27; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
28; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
29; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
30; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
31; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
32; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
33; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
34; CHECK-NEXT:    ret
35;
36; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i8:
37; CHECK-ZVBB:       # %bb.0:
38; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
39; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
40; CHECK-ZVBB-NEXT:    ret
41  %v = call <vscale x 1 x i8> @llvm.vp.bitreverse.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 %evl)
42  ret <vscale x 1 x i8> %v
43}
44
45define <vscale x 1 x i8> @vp_bitreverse_nxv1i8_unmasked(<vscale x 1 x i8> %va, i32 zeroext %evl) {
46; CHECK-LABEL: vp_bitreverse_nxv1i8_unmasked:
47; CHECK:       # %bb.0:
48; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
49; CHECK-NEXT:    vand.vi v9, v8, 15
50; CHECK-NEXT:    vsrl.vi v8, v8, 4
51; CHECK-NEXT:    li a0, 51
52; CHECK-NEXT:    vsll.vi v9, v9, 4
53; CHECK-NEXT:    vand.vi v8, v8, 15
54; CHECK-NEXT:    vor.vv v8, v8, v9
55; CHECK-NEXT:    vsrl.vi v9, v8, 2
56; CHECK-NEXT:    vand.vx v8, v8, a0
57; CHECK-NEXT:    vand.vx v9, v9, a0
58; CHECK-NEXT:    li a0, 85
59; CHECK-NEXT:    vsll.vi v8, v8, 2
60; CHECK-NEXT:    vor.vv v8, v9, v8
61; CHECK-NEXT:    vsrl.vi v9, v8, 1
62; CHECK-NEXT:    vand.vx v8, v8, a0
63; CHECK-NEXT:    vand.vx v9, v9, a0
64; CHECK-NEXT:    vadd.vv v8, v8, v8
65; CHECK-NEXT:    vor.vv v8, v9, v8
66; CHECK-NEXT:    ret
67;
68; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i8_unmasked:
69; CHECK-ZVBB:       # %bb.0:
70; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
71; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
72; CHECK-ZVBB-NEXT:    ret
73  %v = call <vscale x 1 x i8> @llvm.vp.bitreverse.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
74  ret <vscale x 1 x i8> %v
75}
76
77declare <vscale x 2 x i8> @llvm.vp.bitreverse.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i1>, i32)
78
79define <vscale x 2 x i8> @vp_bitreverse_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
80; CHECK-LABEL: vp_bitreverse_nxv2i8:
81; CHECK:       # %bb.0:
82; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
83; CHECK-NEXT:    vand.vi v9, v8, 15, v0.t
84; CHECK-NEXT:    li a0, 51
85; CHECK-NEXT:    vsll.vi v9, v9, 4, v0.t
86; CHECK-NEXT:    vsrl.vi v8, v8, 4, v0.t
87; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
88; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
89; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
90; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
91; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
92; CHECK-NEXT:    li a0, 85
93; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
94; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
95; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
96; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
97; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
98; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
99; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
100; CHECK-NEXT:    ret
101;
102; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i8:
103; CHECK-ZVBB:       # %bb.0:
104; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
105; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
106; CHECK-ZVBB-NEXT:    ret
107  %v = call <vscale x 2 x i8> @llvm.vp.bitreverse.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 %evl)
108  ret <vscale x 2 x i8> %v
109}
110
111define <vscale x 2 x i8> @vp_bitreverse_nxv2i8_unmasked(<vscale x 2 x i8> %va, i32 zeroext %evl) {
112; CHECK-LABEL: vp_bitreverse_nxv2i8_unmasked:
113; CHECK:       # %bb.0:
114; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
115; CHECK-NEXT:    vand.vi v9, v8, 15
116; CHECK-NEXT:    vsrl.vi v8, v8, 4
117; CHECK-NEXT:    li a0, 51
118; CHECK-NEXT:    vsll.vi v9, v9, 4
119; CHECK-NEXT:    vand.vi v8, v8, 15
120; CHECK-NEXT:    vor.vv v8, v8, v9
121; CHECK-NEXT:    vsrl.vi v9, v8, 2
122; CHECK-NEXT:    vand.vx v8, v8, a0
123; CHECK-NEXT:    vand.vx v9, v9, a0
124; CHECK-NEXT:    li a0, 85
125; CHECK-NEXT:    vsll.vi v8, v8, 2
126; CHECK-NEXT:    vor.vv v8, v9, v8
127; CHECK-NEXT:    vsrl.vi v9, v8, 1
128; CHECK-NEXT:    vand.vx v8, v8, a0
129; CHECK-NEXT:    vand.vx v9, v9, a0
130; CHECK-NEXT:    vadd.vv v8, v8, v8
131; CHECK-NEXT:    vor.vv v8, v9, v8
132; CHECK-NEXT:    ret
133;
134; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i8_unmasked:
135; CHECK-ZVBB:       # %bb.0:
136; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
137; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
138; CHECK-ZVBB-NEXT:    ret
139  %v = call <vscale x 2 x i8> @llvm.vp.bitreverse.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
140  ret <vscale x 2 x i8> %v
141}
142
143declare <vscale x 4 x i8> @llvm.vp.bitreverse.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i1>, i32)
144
145define <vscale x 4 x i8> @vp_bitreverse_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
146; CHECK-LABEL: vp_bitreverse_nxv4i8:
147; CHECK:       # %bb.0:
148; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
149; CHECK-NEXT:    vand.vi v9, v8, 15, v0.t
150; CHECK-NEXT:    li a0, 51
151; CHECK-NEXT:    vsll.vi v9, v9, 4, v0.t
152; CHECK-NEXT:    vsrl.vi v8, v8, 4, v0.t
153; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
154; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
155; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
156; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
157; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
158; CHECK-NEXT:    li a0, 85
159; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
160; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
161; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
162; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
163; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
164; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
165; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
166; CHECK-NEXT:    ret
167;
168; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i8:
169; CHECK-ZVBB:       # %bb.0:
170; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
171; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
172; CHECK-ZVBB-NEXT:    ret
173  %v = call <vscale x 4 x i8> @llvm.vp.bitreverse.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 %evl)
174  ret <vscale x 4 x i8> %v
175}
176
177define <vscale x 4 x i8> @vp_bitreverse_nxv4i8_unmasked(<vscale x 4 x i8> %va, i32 zeroext %evl) {
178; CHECK-LABEL: vp_bitreverse_nxv4i8_unmasked:
179; CHECK:       # %bb.0:
180; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
181; CHECK-NEXT:    vand.vi v9, v8, 15
182; CHECK-NEXT:    vsrl.vi v8, v8, 4
183; CHECK-NEXT:    li a0, 51
184; CHECK-NEXT:    vsll.vi v9, v9, 4
185; CHECK-NEXT:    vand.vi v8, v8, 15
186; CHECK-NEXT:    vor.vv v8, v8, v9
187; CHECK-NEXT:    vsrl.vi v9, v8, 2
188; CHECK-NEXT:    vand.vx v8, v8, a0
189; CHECK-NEXT:    vand.vx v9, v9, a0
190; CHECK-NEXT:    li a0, 85
191; CHECK-NEXT:    vsll.vi v8, v8, 2
192; CHECK-NEXT:    vor.vv v8, v9, v8
193; CHECK-NEXT:    vsrl.vi v9, v8, 1
194; CHECK-NEXT:    vand.vx v8, v8, a0
195; CHECK-NEXT:    vand.vx v9, v9, a0
196; CHECK-NEXT:    vadd.vv v8, v8, v8
197; CHECK-NEXT:    vor.vv v8, v9, v8
198; CHECK-NEXT:    ret
199;
200; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i8_unmasked:
201; CHECK-ZVBB:       # %bb.0:
202; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
203; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
204; CHECK-ZVBB-NEXT:    ret
205  %v = call <vscale x 4 x i8> @llvm.vp.bitreverse.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
206  ret <vscale x 4 x i8> %v
207}
208
209declare <vscale x 8 x i8> @llvm.vp.bitreverse.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i1>, i32)
210
211define <vscale x 8 x i8> @vp_bitreverse_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
212; CHECK-LABEL: vp_bitreverse_nxv8i8:
213; CHECK:       # %bb.0:
214; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
215; CHECK-NEXT:    vand.vi v9, v8, 15, v0.t
216; CHECK-NEXT:    li a0, 51
217; CHECK-NEXT:    vsll.vi v9, v9, 4, v0.t
218; CHECK-NEXT:    vsrl.vi v8, v8, 4, v0.t
219; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
220; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
221; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
222; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
223; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
224; CHECK-NEXT:    li a0, 85
225; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
226; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
227; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
228; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
229; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
230; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
231; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
232; CHECK-NEXT:    ret
233;
234; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i8:
235; CHECK-ZVBB:       # %bb.0:
236; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
237; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
238; CHECK-ZVBB-NEXT:    ret
239  %v = call <vscale x 8 x i8> @llvm.vp.bitreverse.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 %evl)
240  ret <vscale x 8 x i8> %v
241}
242
243define <vscale x 8 x i8> @vp_bitreverse_nxv8i8_unmasked(<vscale x 8 x i8> %va, i32 zeroext %evl) {
244; CHECK-LABEL: vp_bitreverse_nxv8i8_unmasked:
245; CHECK:       # %bb.0:
246; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
247; CHECK-NEXT:    vand.vi v9, v8, 15
248; CHECK-NEXT:    vsrl.vi v8, v8, 4
249; CHECK-NEXT:    li a0, 51
250; CHECK-NEXT:    vsll.vi v9, v9, 4
251; CHECK-NEXT:    vand.vi v8, v8, 15
252; CHECK-NEXT:    vor.vv v8, v8, v9
253; CHECK-NEXT:    vsrl.vi v9, v8, 2
254; CHECK-NEXT:    vand.vx v8, v8, a0
255; CHECK-NEXT:    vand.vx v9, v9, a0
256; CHECK-NEXT:    li a0, 85
257; CHECK-NEXT:    vsll.vi v8, v8, 2
258; CHECK-NEXT:    vor.vv v8, v9, v8
259; CHECK-NEXT:    vsrl.vi v9, v8, 1
260; CHECK-NEXT:    vand.vx v8, v8, a0
261; CHECK-NEXT:    vand.vx v9, v9, a0
262; CHECK-NEXT:    vadd.vv v8, v8, v8
263; CHECK-NEXT:    vor.vv v8, v9, v8
264; CHECK-NEXT:    ret
265;
266; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i8_unmasked:
267; CHECK-ZVBB:       # %bb.0:
268; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
269; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
270; CHECK-ZVBB-NEXT:    ret
271  %v = call <vscale x 8 x i8> @llvm.vp.bitreverse.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
272  ret <vscale x 8 x i8> %v
273}
274
275declare <vscale x 16 x i8> @llvm.vp.bitreverse.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i32)
276
277define <vscale x 16 x i8> @vp_bitreverse_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
278; CHECK-LABEL: vp_bitreverse_nxv16i8:
279; CHECK:       # %bb.0:
280; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
281; CHECK-NEXT:    vand.vi v10, v8, 15, v0.t
282; CHECK-NEXT:    li a0, 51
283; CHECK-NEXT:    vsll.vi v10, v10, 4, v0.t
284; CHECK-NEXT:    vsrl.vi v8, v8, 4, v0.t
285; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
286; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
287; CHECK-NEXT:    vsrl.vi v10, v8, 2, v0.t
288; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
289; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
290; CHECK-NEXT:    li a0, 85
291; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
292; CHECK-NEXT:    vor.vv v8, v10, v8, v0.t
293; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
294; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
295; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
296; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
297; CHECK-NEXT:    vor.vv v8, v10, v8, v0.t
298; CHECK-NEXT:    ret
299;
300; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i8:
301; CHECK-ZVBB:       # %bb.0:
302; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
303; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
304; CHECK-ZVBB-NEXT:    ret
305  %v = call <vscale x 16 x i8> @llvm.vp.bitreverse.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> %m, i32 %evl)
306  ret <vscale x 16 x i8> %v
307}
308
309define <vscale x 16 x i8> @vp_bitreverse_nxv16i8_unmasked(<vscale x 16 x i8> %va, i32 zeroext %evl) {
310; CHECK-LABEL: vp_bitreverse_nxv16i8_unmasked:
311; CHECK:       # %bb.0:
312; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
313; CHECK-NEXT:    vand.vi v10, v8, 15
314; CHECK-NEXT:    vsrl.vi v8, v8, 4
315; CHECK-NEXT:    li a0, 51
316; CHECK-NEXT:    vsll.vi v10, v10, 4
317; CHECK-NEXT:    vand.vi v8, v8, 15
318; CHECK-NEXT:    vor.vv v8, v8, v10
319; CHECK-NEXT:    vsrl.vi v10, v8, 2
320; CHECK-NEXT:    vand.vx v8, v8, a0
321; CHECK-NEXT:    vand.vx v10, v10, a0
322; CHECK-NEXT:    li a0, 85
323; CHECK-NEXT:    vsll.vi v8, v8, 2
324; CHECK-NEXT:    vor.vv v8, v10, v8
325; CHECK-NEXT:    vsrl.vi v10, v8, 1
326; CHECK-NEXT:    vand.vx v8, v8, a0
327; CHECK-NEXT:    vand.vx v10, v10, a0
328; CHECK-NEXT:    vadd.vv v8, v8, v8
329; CHECK-NEXT:    vor.vv v8, v10, v8
330; CHECK-NEXT:    ret
331;
332; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i8_unmasked:
333; CHECK-ZVBB:       # %bb.0:
334; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
335; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
336; CHECK-ZVBB-NEXT:    ret
337  %v = call <vscale x 16 x i8> @llvm.vp.bitreverse.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
338  ret <vscale x 16 x i8> %v
339}
340
341declare <vscale x 32 x i8> @llvm.vp.bitreverse.nxv32i8(<vscale x 32 x i8>, <vscale x 32 x i1>, i32)
342
343define <vscale x 32 x i8> @vp_bitreverse_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
344; CHECK-LABEL: vp_bitreverse_nxv32i8:
345; CHECK:       # %bb.0:
346; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
347; CHECK-NEXT:    vand.vi v12, v8, 15, v0.t
348; CHECK-NEXT:    li a0, 51
349; CHECK-NEXT:    vsll.vi v12, v12, 4, v0.t
350; CHECK-NEXT:    vsrl.vi v8, v8, 4, v0.t
351; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
352; CHECK-NEXT:    vor.vv v8, v8, v12, v0.t
353; CHECK-NEXT:    vsrl.vi v12, v8, 2, v0.t
354; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
355; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
356; CHECK-NEXT:    li a0, 85
357; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
358; CHECK-NEXT:    vor.vv v8, v12, v8, v0.t
359; CHECK-NEXT:    vsrl.vi v12, v8, 1, v0.t
360; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
361; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
362; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
363; CHECK-NEXT:    vor.vv v8, v12, v8, v0.t
364; CHECK-NEXT:    ret
365;
366; CHECK-ZVBB-LABEL: vp_bitreverse_nxv32i8:
367; CHECK-ZVBB:       # %bb.0:
368; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
369; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
370; CHECK-ZVBB-NEXT:    ret
371  %v = call <vscale x 32 x i8> @llvm.vp.bitreverse.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i1> %m, i32 %evl)
372  ret <vscale x 32 x i8> %v
373}
374
375define <vscale x 32 x i8> @vp_bitreverse_nxv32i8_unmasked(<vscale x 32 x i8> %va, i32 zeroext %evl) {
376; CHECK-LABEL: vp_bitreverse_nxv32i8_unmasked:
377; CHECK:       # %bb.0:
378; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
379; CHECK-NEXT:    vand.vi v12, v8, 15
380; CHECK-NEXT:    vsrl.vi v8, v8, 4
381; CHECK-NEXT:    li a0, 51
382; CHECK-NEXT:    vsll.vi v12, v12, 4
383; CHECK-NEXT:    vand.vi v8, v8, 15
384; CHECK-NEXT:    vor.vv v8, v8, v12
385; CHECK-NEXT:    vsrl.vi v12, v8, 2
386; CHECK-NEXT:    vand.vx v8, v8, a0
387; CHECK-NEXT:    vand.vx v12, v12, a0
388; CHECK-NEXT:    li a0, 85
389; CHECK-NEXT:    vsll.vi v8, v8, 2
390; CHECK-NEXT:    vor.vv v8, v12, v8
391; CHECK-NEXT:    vsrl.vi v12, v8, 1
392; CHECK-NEXT:    vand.vx v8, v8, a0
393; CHECK-NEXT:    vand.vx v12, v12, a0
394; CHECK-NEXT:    vadd.vv v8, v8, v8
395; CHECK-NEXT:    vor.vv v8, v12, v8
396; CHECK-NEXT:    ret
397;
398; CHECK-ZVBB-LABEL: vp_bitreverse_nxv32i8_unmasked:
399; CHECK-ZVBB:       # %bb.0:
400; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
401; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
402; CHECK-ZVBB-NEXT:    ret
403  %v = call <vscale x 32 x i8> @llvm.vp.bitreverse.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
404  ret <vscale x 32 x i8> %v
405}
406
407declare <vscale x 64 x i8> @llvm.vp.bitreverse.nxv64i8(<vscale x 64 x i8>, <vscale x 64 x i1>, i32)
408
409define <vscale x 64 x i8> @vp_bitreverse_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i1> %m, i32 zeroext %evl) {
410; CHECK-LABEL: vp_bitreverse_nxv64i8:
411; CHECK:       # %bb.0:
412; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
413; CHECK-NEXT:    vand.vi v16, v8, 15, v0.t
414; CHECK-NEXT:    li a0, 51
415; CHECK-NEXT:    vsll.vi v16, v16, 4, v0.t
416; CHECK-NEXT:    vsrl.vi v8, v8, 4, v0.t
417; CHECK-NEXT:    vand.vi v8, v8, 15, v0.t
418; CHECK-NEXT:    vor.vv v16, v8, v16, v0.t
419; CHECK-NEXT:    vsrl.vi v8, v16, 2, v0.t
420; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
421; CHECK-NEXT:    vand.vx v16, v16, a0, v0.t
422; CHECK-NEXT:    li a0, 85
423; CHECK-NEXT:    vsll.vi v16, v16, 2, v0.t
424; CHECK-NEXT:    vor.vv v8, v8, v16, v0.t
425; CHECK-NEXT:    vsrl.vi v16, v8, 1, v0.t
426; CHECK-NEXT:    vand.vx v16, v16, a0, v0.t
427; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
428; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
429; CHECK-NEXT:    vor.vv v8, v16, v8, v0.t
430; CHECK-NEXT:    ret
431;
432; CHECK-ZVBB-LABEL: vp_bitreverse_nxv64i8:
433; CHECK-ZVBB:       # %bb.0:
434; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
435; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
436; CHECK-ZVBB-NEXT:    ret
437  %v = call <vscale x 64 x i8> @llvm.vp.bitreverse.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i1> %m, i32 %evl)
438  ret <vscale x 64 x i8> %v
439}
440
441define <vscale x 64 x i8> @vp_bitreverse_nxv64i8_unmasked(<vscale x 64 x i8> %va, i32 zeroext %evl) {
442; CHECK-LABEL: vp_bitreverse_nxv64i8_unmasked:
443; CHECK:       # %bb.0:
444; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
445; CHECK-NEXT:    vand.vi v16, v8, 15
446; CHECK-NEXT:    vsrl.vi v8, v8, 4
447; CHECK-NEXT:    li a0, 51
448; CHECK-NEXT:    vsll.vi v16, v16, 4
449; CHECK-NEXT:    vand.vi v8, v8, 15
450; CHECK-NEXT:    vor.vv v8, v8, v16
451; CHECK-NEXT:    vsrl.vi v16, v8, 2
452; CHECK-NEXT:    vand.vx v8, v8, a0
453; CHECK-NEXT:    vand.vx v16, v16, a0
454; CHECK-NEXT:    li a0, 85
455; CHECK-NEXT:    vsll.vi v8, v8, 2
456; CHECK-NEXT:    vor.vv v8, v16, v8
457; CHECK-NEXT:    vsrl.vi v16, v8, 1
458; CHECK-NEXT:    vand.vx v8, v8, a0
459; CHECK-NEXT:    vand.vx v16, v16, a0
460; CHECK-NEXT:    vadd.vv v8, v8, v8
461; CHECK-NEXT:    vor.vv v8, v16, v8
462; CHECK-NEXT:    ret
463;
464; CHECK-ZVBB-LABEL: vp_bitreverse_nxv64i8_unmasked:
465; CHECK-ZVBB:       # %bb.0:
466; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
467; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
468; CHECK-ZVBB-NEXT:    ret
469  %v = call <vscale x 64 x i8> @llvm.vp.bitreverse.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i1> splat (i1 true), i32 %evl)
470  ret <vscale x 64 x i8> %v
471}
472
473declare <vscale x 1 x i16> @llvm.vp.bitreverse.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i1>, i32)
474
475define <vscale x 1 x i16> @vp_bitreverse_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
476; CHECK-LABEL: vp_bitreverse_nxv1i16:
477; CHECK:       # %bb.0:
478; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
479; CHECK-NEXT:    vsrl.vi v9, v8, 8, v0.t
480; CHECK-NEXT:    lui a0, 1
481; CHECK-NEXT:    vsll.vi v8, v8, 8, v0.t
482; CHECK-NEXT:    addi a0, a0, -241
483; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
484; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
485; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
486; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
487; CHECK-NEXT:    lui a0, 3
488; CHECK-NEXT:    addi a0, a0, 819
489; CHECK-NEXT:    vsll.vi v8, v8, 4, v0.t
490; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
491; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
492; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
493; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
494; CHECK-NEXT:    lui a0, 5
495; CHECK-NEXT:    addi a0, a0, 1365
496; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
497; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
498; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
499; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
500; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
501; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
502; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
503; CHECK-NEXT:    ret
504;
505; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i16:
506; CHECK-ZVBB:       # %bb.0:
507; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
508; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
509; CHECK-ZVBB-NEXT:    ret
510  %v = call <vscale x 1 x i16> @llvm.vp.bitreverse.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 %evl)
511  ret <vscale x 1 x i16> %v
512}
513
514define <vscale x 1 x i16> @vp_bitreverse_nxv1i16_unmasked(<vscale x 1 x i16> %va, i32 zeroext %evl) {
515; CHECK-LABEL: vp_bitreverse_nxv1i16_unmasked:
516; CHECK:       # %bb.0:
517; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
518; CHECK-NEXT:    vsrl.vi v9, v8, 8
519; CHECK-NEXT:    vsll.vi v8, v8, 8
520; CHECK-NEXT:    lui a0, 1
521; CHECK-NEXT:    vor.vv v8, v8, v9
522; CHECK-NEXT:    addi a0, a0, -241
523; CHECK-NEXT:    vsrl.vi v9, v8, 4
524; CHECK-NEXT:    vand.vx v8, v8, a0
525; CHECK-NEXT:    vand.vx v9, v9, a0
526; CHECK-NEXT:    lui a0, 3
527; CHECK-NEXT:    addi a0, a0, 819
528; CHECK-NEXT:    vsll.vi v8, v8, 4
529; CHECK-NEXT:    vor.vv v8, v9, v8
530; CHECK-NEXT:    vsrl.vi v9, v8, 2
531; CHECK-NEXT:    vand.vx v8, v8, a0
532; CHECK-NEXT:    vand.vx v9, v9, a0
533; CHECK-NEXT:    lui a0, 5
534; CHECK-NEXT:    addi a0, a0, 1365
535; CHECK-NEXT:    vsll.vi v8, v8, 2
536; CHECK-NEXT:    vor.vv v8, v9, v8
537; CHECK-NEXT:    vsrl.vi v9, v8, 1
538; CHECK-NEXT:    vand.vx v8, v8, a0
539; CHECK-NEXT:    vand.vx v9, v9, a0
540; CHECK-NEXT:    vadd.vv v8, v8, v8
541; CHECK-NEXT:    vor.vv v8, v9, v8
542; CHECK-NEXT:    ret
543;
544; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i16_unmasked:
545; CHECK-ZVBB:       # %bb.0:
546; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
547; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
548; CHECK-ZVBB-NEXT:    ret
549  %v = call <vscale x 1 x i16> @llvm.vp.bitreverse.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
550  ret <vscale x 1 x i16> %v
551}
552
553declare <vscale x 2 x i16> @llvm.vp.bitreverse.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i32)
554
555define <vscale x 2 x i16> @vp_bitreverse_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
556; CHECK-LABEL: vp_bitreverse_nxv2i16:
557; CHECK:       # %bb.0:
558; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
559; CHECK-NEXT:    vsrl.vi v9, v8, 8, v0.t
560; CHECK-NEXT:    lui a0, 1
561; CHECK-NEXT:    vsll.vi v8, v8, 8, v0.t
562; CHECK-NEXT:    addi a0, a0, -241
563; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
564; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
565; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
566; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
567; CHECK-NEXT:    lui a0, 3
568; CHECK-NEXT:    addi a0, a0, 819
569; CHECK-NEXT:    vsll.vi v8, v8, 4, v0.t
570; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
571; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
572; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
573; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
574; CHECK-NEXT:    lui a0, 5
575; CHECK-NEXT:    addi a0, a0, 1365
576; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
577; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
578; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
579; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
580; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
581; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
582; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
583; CHECK-NEXT:    ret
584;
585; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i16:
586; CHECK-ZVBB:       # %bb.0:
587; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
588; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
589; CHECK-ZVBB-NEXT:    ret
590  %v = call <vscale x 2 x i16> @llvm.vp.bitreverse.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 %evl)
591  ret <vscale x 2 x i16> %v
592}
593
594define <vscale x 2 x i16> @vp_bitreverse_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32 zeroext %evl) {
595; CHECK-LABEL: vp_bitreverse_nxv2i16_unmasked:
596; CHECK:       # %bb.0:
597; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
598; CHECK-NEXT:    vsrl.vi v9, v8, 8
599; CHECK-NEXT:    vsll.vi v8, v8, 8
600; CHECK-NEXT:    lui a0, 1
601; CHECK-NEXT:    vor.vv v8, v8, v9
602; CHECK-NEXT:    addi a0, a0, -241
603; CHECK-NEXT:    vsrl.vi v9, v8, 4
604; CHECK-NEXT:    vand.vx v8, v8, a0
605; CHECK-NEXT:    vand.vx v9, v9, a0
606; CHECK-NEXT:    lui a0, 3
607; CHECK-NEXT:    addi a0, a0, 819
608; CHECK-NEXT:    vsll.vi v8, v8, 4
609; CHECK-NEXT:    vor.vv v8, v9, v8
610; CHECK-NEXT:    vsrl.vi v9, v8, 2
611; CHECK-NEXT:    vand.vx v8, v8, a0
612; CHECK-NEXT:    vand.vx v9, v9, a0
613; CHECK-NEXT:    lui a0, 5
614; CHECK-NEXT:    addi a0, a0, 1365
615; CHECK-NEXT:    vsll.vi v8, v8, 2
616; CHECK-NEXT:    vor.vv v8, v9, v8
617; CHECK-NEXT:    vsrl.vi v9, v8, 1
618; CHECK-NEXT:    vand.vx v8, v8, a0
619; CHECK-NEXT:    vand.vx v9, v9, a0
620; CHECK-NEXT:    vadd.vv v8, v8, v8
621; CHECK-NEXT:    vor.vv v8, v9, v8
622; CHECK-NEXT:    ret
623;
624; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i16_unmasked:
625; CHECK-ZVBB:       # %bb.0:
626; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
627; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
628; CHECK-ZVBB-NEXT:    ret
629  %v = call <vscale x 2 x i16> @llvm.vp.bitreverse.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
630  ret <vscale x 2 x i16> %v
631}
632
633declare <vscale x 4 x i16> @llvm.vp.bitreverse.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, i32)
634
635define <vscale x 4 x i16> @vp_bitreverse_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
636; CHECK-LABEL: vp_bitreverse_nxv4i16:
637; CHECK:       # %bb.0:
638; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
639; CHECK-NEXT:    vsrl.vi v9, v8, 8, v0.t
640; CHECK-NEXT:    lui a0, 1
641; CHECK-NEXT:    vsll.vi v8, v8, 8, v0.t
642; CHECK-NEXT:    addi a0, a0, -241
643; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
644; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
645; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
646; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
647; CHECK-NEXT:    lui a0, 3
648; CHECK-NEXT:    addi a0, a0, 819
649; CHECK-NEXT:    vsll.vi v8, v8, 4, v0.t
650; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
651; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
652; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
653; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
654; CHECK-NEXT:    lui a0, 5
655; CHECK-NEXT:    addi a0, a0, 1365
656; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
657; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
658; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
659; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
660; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
661; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
662; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
663; CHECK-NEXT:    ret
664;
665; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i16:
666; CHECK-ZVBB:       # %bb.0:
667; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
668; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
669; CHECK-ZVBB-NEXT:    ret
670  %v = call <vscale x 4 x i16> @llvm.vp.bitreverse.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 %evl)
671  ret <vscale x 4 x i16> %v
672}
673
674define <vscale x 4 x i16> @vp_bitreverse_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32 zeroext %evl) {
675; CHECK-LABEL: vp_bitreverse_nxv4i16_unmasked:
676; CHECK:       # %bb.0:
677; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
678; CHECK-NEXT:    vsrl.vi v9, v8, 8
679; CHECK-NEXT:    vsll.vi v8, v8, 8
680; CHECK-NEXT:    lui a0, 1
681; CHECK-NEXT:    vor.vv v8, v8, v9
682; CHECK-NEXT:    addi a0, a0, -241
683; CHECK-NEXT:    vsrl.vi v9, v8, 4
684; CHECK-NEXT:    vand.vx v8, v8, a0
685; CHECK-NEXT:    vand.vx v9, v9, a0
686; CHECK-NEXT:    lui a0, 3
687; CHECK-NEXT:    addi a0, a0, 819
688; CHECK-NEXT:    vsll.vi v8, v8, 4
689; CHECK-NEXT:    vor.vv v8, v9, v8
690; CHECK-NEXT:    vsrl.vi v9, v8, 2
691; CHECK-NEXT:    vand.vx v8, v8, a0
692; CHECK-NEXT:    vand.vx v9, v9, a0
693; CHECK-NEXT:    lui a0, 5
694; CHECK-NEXT:    addi a0, a0, 1365
695; CHECK-NEXT:    vsll.vi v8, v8, 2
696; CHECK-NEXT:    vor.vv v8, v9, v8
697; CHECK-NEXT:    vsrl.vi v9, v8, 1
698; CHECK-NEXT:    vand.vx v8, v8, a0
699; CHECK-NEXT:    vand.vx v9, v9, a0
700; CHECK-NEXT:    vadd.vv v8, v8, v8
701; CHECK-NEXT:    vor.vv v8, v9, v8
702; CHECK-NEXT:    ret
703;
704; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i16_unmasked:
705; CHECK-ZVBB:       # %bb.0:
706; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
707; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
708; CHECK-ZVBB-NEXT:    ret
709  %v = call <vscale x 4 x i16> @llvm.vp.bitreverse.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
710  ret <vscale x 4 x i16> %v
711}
712
713declare <vscale x 8 x i16> @llvm.vp.bitreverse.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i32)
714
715define <vscale x 8 x i16> @vp_bitreverse_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
716; CHECK-LABEL: vp_bitreverse_nxv8i16:
717; CHECK:       # %bb.0:
718; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
719; CHECK-NEXT:    vsrl.vi v10, v8, 8, v0.t
720; CHECK-NEXT:    lui a0, 1
721; CHECK-NEXT:    vsll.vi v8, v8, 8, v0.t
722; CHECK-NEXT:    addi a0, a0, -241
723; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
724; CHECK-NEXT:    vsrl.vi v10, v8, 4, v0.t
725; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
726; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
727; CHECK-NEXT:    lui a0, 3
728; CHECK-NEXT:    addi a0, a0, 819
729; CHECK-NEXT:    vsll.vi v8, v8, 4, v0.t
730; CHECK-NEXT:    vor.vv v8, v10, v8, v0.t
731; CHECK-NEXT:    vsrl.vi v10, v8, 2, v0.t
732; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
733; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
734; CHECK-NEXT:    lui a0, 5
735; CHECK-NEXT:    addi a0, a0, 1365
736; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
737; CHECK-NEXT:    vor.vv v8, v10, v8, v0.t
738; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
739; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
740; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
741; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
742; CHECK-NEXT:    vor.vv v8, v10, v8, v0.t
743; CHECK-NEXT:    ret
744;
745; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i16:
746; CHECK-ZVBB:       # %bb.0:
747; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
748; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
749; CHECK-ZVBB-NEXT:    ret
750  %v = call <vscale x 8 x i16> @llvm.vp.bitreverse.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 %evl)
751  ret <vscale x 8 x i16> %v
752}
753
754define <vscale x 8 x i16> @vp_bitreverse_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32 zeroext %evl) {
755; CHECK-LABEL: vp_bitreverse_nxv8i16_unmasked:
756; CHECK:       # %bb.0:
757; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
758; CHECK-NEXT:    vsrl.vi v10, v8, 8
759; CHECK-NEXT:    vsll.vi v8, v8, 8
760; CHECK-NEXT:    lui a0, 1
761; CHECK-NEXT:    vor.vv v8, v8, v10
762; CHECK-NEXT:    addi a0, a0, -241
763; CHECK-NEXT:    vsrl.vi v10, v8, 4
764; CHECK-NEXT:    vand.vx v8, v8, a0
765; CHECK-NEXT:    vand.vx v10, v10, a0
766; CHECK-NEXT:    lui a0, 3
767; CHECK-NEXT:    addi a0, a0, 819
768; CHECK-NEXT:    vsll.vi v8, v8, 4
769; CHECK-NEXT:    vor.vv v8, v10, v8
770; CHECK-NEXT:    vsrl.vi v10, v8, 2
771; CHECK-NEXT:    vand.vx v8, v8, a0
772; CHECK-NEXT:    vand.vx v10, v10, a0
773; CHECK-NEXT:    lui a0, 5
774; CHECK-NEXT:    addi a0, a0, 1365
775; CHECK-NEXT:    vsll.vi v8, v8, 2
776; CHECK-NEXT:    vor.vv v8, v10, v8
777; CHECK-NEXT:    vsrl.vi v10, v8, 1
778; CHECK-NEXT:    vand.vx v8, v8, a0
779; CHECK-NEXT:    vand.vx v10, v10, a0
780; CHECK-NEXT:    vadd.vv v8, v8, v8
781; CHECK-NEXT:    vor.vv v8, v10, v8
782; CHECK-NEXT:    ret
783;
784; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i16_unmasked:
785; CHECK-ZVBB:       # %bb.0:
786; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
787; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
788; CHECK-ZVBB-NEXT:    ret
789  %v = call <vscale x 8 x i16> @llvm.vp.bitreverse.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
790  ret <vscale x 8 x i16> %v
791}
792
793declare <vscale x 16 x i16> @llvm.vp.bitreverse.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i1>, i32)
794
795define <vscale x 16 x i16> @vp_bitreverse_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
796; CHECK-LABEL: vp_bitreverse_nxv16i16:
797; CHECK:       # %bb.0:
798; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
799; CHECK-NEXT:    vsrl.vi v12, v8, 8, v0.t
800; CHECK-NEXT:    lui a0, 1
801; CHECK-NEXT:    vsll.vi v8, v8, 8, v0.t
802; CHECK-NEXT:    addi a0, a0, -241
803; CHECK-NEXT:    vor.vv v8, v8, v12, v0.t
804; CHECK-NEXT:    vsrl.vi v12, v8, 4, v0.t
805; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
806; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
807; CHECK-NEXT:    lui a0, 3
808; CHECK-NEXT:    addi a0, a0, 819
809; CHECK-NEXT:    vsll.vi v8, v8, 4, v0.t
810; CHECK-NEXT:    vor.vv v8, v12, v8, v0.t
811; CHECK-NEXT:    vsrl.vi v12, v8, 2, v0.t
812; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
813; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
814; CHECK-NEXT:    lui a0, 5
815; CHECK-NEXT:    addi a0, a0, 1365
816; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
817; CHECK-NEXT:    vor.vv v8, v12, v8, v0.t
818; CHECK-NEXT:    vsrl.vi v12, v8, 1, v0.t
819; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
820; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
821; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
822; CHECK-NEXT:    vor.vv v8, v12, v8, v0.t
823; CHECK-NEXT:    ret
824;
825; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i16:
826; CHECK-ZVBB:       # %bb.0:
827; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
828; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
829; CHECK-ZVBB-NEXT:    ret
830  %v = call <vscale x 16 x i16> @llvm.vp.bitreverse.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 %evl)
831  ret <vscale x 16 x i16> %v
832}
833
834define <vscale x 16 x i16> @vp_bitreverse_nxv16i16_unmasked(<vscale x 16 x i16> %va, i32 zeroext %evl) {
835; CHECK-LABEL: vp_bitreverse_nxv16i16_unmasked:
836; CHECK:       # %bb.0:
837; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
838; CHECK-NEXT:    vsrl.vi v12, v8, 8
839; CHECK-NEXT:    vsll.vi v8, v8, 8
840; CHECK-NEXT:    lui a0, 1
841; CHECK-NEXT:    vor.vv v8, v8, v12
842; CHECK-NEXT:    addi a0, a0, -241
843; CHECK-NEXT:    vsrl.vi v12, v8, 4
844; CHECK-NEXT:    vand.vx v8, v8, a0
845; CHECK-NEXT:    vand.vx v12, v12, a0
846; CHECK-NEXT:    lui a0, 3
847; CHECK-NEXT:    addi a0, a0, 819
848; CHECK-NEXT:    vsll.vi v8, v8, 4
849; CHECK-NEXT:    vor.vv v8, v12, v8
850; CHECK-NEXT:    vsrl.vi v12, v8, 2
851; CHECK-NEXT:    vand.vx v8, v8, a0
852; CHECK-NEXT:    vand.vx v12, v12, a0
853; CHECK-NEXT:    lui a0, 5
854; CHECK-NEXT:    addi a0, a0, 1365
855; CHECK-NEXT:    vsll.vi v8, v8, 2
856; CHECK-NEXT:    vor.vv v8, v12, v8
857; CHECK-NEXT:    vsrl.vi v12, v8, 1
858; CHECK-NEXT:    vand.vx v8, v8, a0
859; CHECK-NEXT:    vand.vx v12, v12, a0
860; CHECK-NEXT:    vadd.vv v8, v8, v8
861; CHECK-NEXT:    vor.vv v8, v12, v8
862; CHECK-NEXT:    ret
863;
864; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i16_unmasked:
865; CHECK-ZVBB:       # %bb.0:
866; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
867; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
868; CHECK-ZVBB-NEXT:    ret
869  %v = call <vscale x 16 x i16> @llvm.vp.bitreverse.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
870  ret <vscale x 16 x i16> %v
871}
872
873declare <vscale x 32 x i16> @llvm.vp.bitreverse.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i1>, i32)
874
875define <vscale x 32 x i16> @vp_bitreverse_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
876; CHECK-LABEL: vp_bitreverse_nxv32i16:
877; CHECK:       # %bb.0:
878; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
879; CHECK-NEXT:    vsrl.vi v16, v8, 8, v0.t
880; CHECK-NEXT:    lui a0, 1
881; CHECK-NEXT:    vsll.vi v8, v8, 8, v0.t
882; CHECK-NEXT:    addi a0, a0, -241
883; CHECK-NEXT:    vor.vv v16, v8, v16, v0.t
884; CHECK-NEXT:    vsrl.vi v8, v16, 4, v0.t
885; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
886; CHECK-NEXT:    vand.vx v16, v16, a0, v0.t
887; CHECK-NEXT:    lui a0, 3
888; CHECK-NEXT:    addi a0, a0, 819
889; CHECK-NEXT:    vsll.vi v16, v16, 4, v0.t
890; CHECK-NEXT:    vor.vv v16, v8, v16, v0.t
891; CHECK-NEXT:    vsrl.vi v8, v16, 2, v0.t
892; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
893; CHECK-NEXT:    vand.vx v16, v16, a0, v0.t
894; CHECK-NEXT:    lui a0, 5
895; CHECK-NEXT:    addi a0, a0, 1365
896; CHECK-NEXT:    vsll.vi v16, v16, 2, v0.t
897; CHECK-NEXT:    vor.vv v8, v8, v16, v0.t
898; CHECK-NEXT:    vsrl.vi v16, v8, 1, v0.t
899; CHECK-NEXT:    vand.vx v16, v16, a0, v0.t
900; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
901; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
902; CHECK-NEXT:    vor.vv v8, v16, v8, v0.t
903; CHECK-NEXT:    ret
904;
905; CHECK-ZVBB-LABEL: vp_bitreverse_nxv32i16:
906; CHECK-ZVBB:       # %bb.0:
907; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
908; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
909; CHECK-ZVBB-NEXT:    ret
910  %v = call <vscale x 32 x i16> @llvm.vp.bitreverse.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 %evl)
911  ret <vscale x 32 x i16> %v
912}
913
914define <vscale x 32 x i16> @vp_bitreverse_nxv32i16_unmasked(<vscale x 32 x i16> %va, i32 zeroext %evl) {
915; CHECK-LABEL: vp_bitreverse_nxv32i16_unmasked:
916; CHECK:       # %bb.0:
917; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
918; CHECK-NEXT:    vsrl.vi v16, v8, 8
919; CHECK-NEXT:    vsll.vi v8, v8, 8
920; CHECK-NEXT:    lui a0, 1
921; CHECK-NEXT:    vor.vv v8, v8, v16
922; CHECK-NEXT:    addi a0, a0, -241
923; CHECK-NEXT:    vsrl.vi v16, v8, 4
924; CHECK-NEXT:    vand.vx v8, v8, a0
925; CHECK-NEXT:    vand.vx v16, v16, a0
926; CHECK-NEXT:    lui a0, 3
927; CHECK-NEXT:    addi a0, a0, 819
928; CHECK-NEXT:    vsll.vi v8, v8, 4
929; CHECK-NEXT:    vor.vv v8, v16, v8
930; CHECK-NEXT:    vsrl.vi v16, v8, 2
931; CHECK-NEXT:    vand.vx v8, v8, a0
932; CHECK-NEXT:    vand.vx v16, v16, a0
933; CHECK-NEXT:    lui a0, 5
934; CHECK-NEXT:    addi a0, a0, 1365
935; CHECK-NEXT:    vsll.vi v8, v8, 2
936; CHECK-NEXT:    vor.vv v8, v16, v8
937; CHECK-NEXT:    vsrl.vi v16, v8, 1
938; CHECK-NEXT:    vand.vx v8, v8, a0
939; CHECK-NEXT:    vand.vx v16, v16, a0
940; CHECK-NEXT:    vadd.vv v8, v8, v8
941; CHECK-NEXT:    vor.vv v8, v16, v8
942; CHECK-NEXT:    ret
943;
944; CHECK-ZVBB-LABEL: vp_bitreverse_nxv32i16_unmasked:
945; CHECK-ZVBB:       # %bb.0:
946; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
947; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
948; CHECK-ZVBB-NEXT:    ret
949  %v = call <vscale x 32 x i16> @llvm.vp.bitreverse.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
950  ret <vscale x 32 x i16> %v
951}
952
953declare <vscale x 1 x i32> @llvm.vp.bitreverse.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i1>, i32)
954
955define <vscale x 1 x i32> @vp_bitreverse_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
956; CHECK-LABEL: vp_bitreverse_nxv1i32:
957; CHECK:       # %bb.0:
958; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
959; CHECK-NEXT:    vsrl.vi v9, v8, 8, v0.t
960; CHECK-NEXT:    lui a0, 16
961; CHECK-NEXT:    addi a0, a0, -256
962; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
963; CHECK-NEXT:    vsrl.vi v10, v8, 24, v0.t
964; CHECK-NEXT:    vor.vv v9, v9, v10, v0.t
965; CHECK-NEXT:    vand.vx v10, v8, a0, v0.t
966; CHECK-NEXT:    lui a0, 61681
967; CHECK-NEXT:    addi a0, a0, -241
968; CHECK-NEXT:    vsll.vi v10, v10, 8, v0.t
969; CHECK-NEXT:    vsll.vi v8, v8, 24, v0.t
970; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
971; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
972; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
973; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
974; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
975; CHECK-NEXT:    lui a0, 209715
976; CHECK-NEXT:    addi a0, a0, 819
977; CHECK-NEXT:    vsll.vi v8, v8, 4, v0.t
978; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
979; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
980; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
981; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
982; CHECK-NEXT:    lui a0, 349525
983; CHECK-NEXT:    addi a0, a0, 1365
984; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
985; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
986; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
987; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
988; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
989; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
990; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
991; CHECK-NEXT:    ret
992;
993; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i32:
994; CHECK-ZVBB:       # %bb.0:
995; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
996; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
997; CHECK-ZVBB-NEXT:    ret
998  %v = call <vscale x 1 x i32> @llvm.vp.bitreverse.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 %evl)
999  ret <vscale x 1 x i32> %v
1000}
1001
1002define <vscale x 1 x i32> @vp_bitreverse_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 zeroext %evl) {
1003; CHECK-LABEL: vp_bitreverse_nxv1i32_unmasked:
1004; CHECK:       # %bb.0:
1005; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
1006; CHECK-NEXT:    vsrl.vi v9, v8, 8
1007; CHECK-NEXT:    lui a0, 16
1008; CHECK-NEXT:    vsrl.vi v10, v8, 24
1009; CHECK-NEXT:    addi a0, a0, -256
1010; CHECK-NEXT:    vand.vx v9, v9, a0
1011; CHECK-NEXT:    vor.vv v9, v9, v10
1012; CHECK-NEXT:    vsll.vi v10, v8, 24
1013; CHECK-NEXT:    vand.vx v8, v8, a0
1014; CHECK-NEXT:    lui a0, 61681
1015; CHECK-NEXT:    addi a0, a0, -241
1016; CHECK-NEXT:    vsll.vi v8, v8, 8
1017; CHECK-NEXT:    vor.vv v8, v10, v8
1018; CHECK-NEXT:    vor.vv v8, v8, v9
1019; CHECK-NEXT:    vsrl.vi v9, v8, 4
1020; CHECK-NEXT:    vand.vx v8, v8, a0
1021; CHECK-NEXT:    vand.vx v9, v9, a0
1022; CHECK-NEXT:    lui a0, 209715
1023; CHECK-NEXT:    addi a0, a0, 819
1024; CHECK-NEXT:    vsll.vi v8, v8, 4
1025; CHECK-NEXT:    vor.vv v8, v9, v8
1026; CHECK-NEXT:    vsrl.vi v9, v8, 2
1027; CHECK-NEXT:    vand.vx v8, v8, a0
1028; CHECK-NEXT:    vand.vx v9, v9, a0
1029; CHECK-NEXT:    lui a0, 349525
1030; CHECK-NEXT:    addi a0, a0, 1365
1031; CHECK-NEXT:    vsll.vi v8, v8, 2
1032; CHECK-NEXT:    vor.vv v8, v9, v8
1033; CHECK-NEXT:    vsrl.vi v9, v8, 1
1034; CHECK-NEXT:    vand.vx v8, v8, a0
1035; CHECK-NEXT:    vand.vx v9, v9, a0
1036; CHECK-NEXT:    vadd.vv v8, v8, v8
1037; CHECK-NEXT:    vor.vv v8, v9, v8
1038; CHECK-NEXT:    ret
1039;
1040; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i32_unmasked:
1041; CHECK-ZVBB:       # %bb.0:
1042; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
1043; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
1044; CHECK-ZVBB-NEXT:    ret
1045  %v = call <vscale x 1 x i32> @llvm.vp.bitreverse.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
1046  ret <vscale x 1 x i32> %v
1047}
1048
1049declare <vscale x 2 x i32> @llvm.vp.bitreverse.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32)
1050
1051define <vscale x 2 x i32> @vp_bitreverse_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1052; CHECK-LABEL: vp_bitreverse_nxv2i32:
1053; CHECK:       # %bb.0:
1054; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
1055; CHECK-NEXT:    vsrl.vi v9, v8, 8, v0.t
1056; CHECK-NEXT:    lui a0, 16
1057; CHECK-NEXT:    addi a0, a0, -256
1058; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
1059; CHECK-NEXT:    vsrl.vi v10, v8, 24, v0.t
1060; CHECK-NEXT:    vor.vv v9, v9, v10, v0.t
1061; CHECK-NEXT:    vand.vx v10, v8, a0, v0.t
1062; CHECK-NEXT:    lui a0, 61681
1063; CHECK-NEXT:    addi a0, a0, -241
1064; CHECK-NEXT:    vsll.vi v10, v10, 8, v0.t
1065; CHECK-NEXT:    vsll.vi v8, v8, 24, v0.t
1066; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
1067; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
1068; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
1069; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
1070; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
1071; CHECK-NEXT:    lui a0, 209715
1072; CHECK-NEXT:    addi a0, a0, 819
1073; CHECK-NEXT:    vsll.vi v8, v8, 4, v0.t
1074; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
1075; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
1076; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
1077; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
1078; CHECK-NEXT:    lui a0, 349525
1079; CHECK-NEXT:    addi a0, a0, 1365
1080; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
1081; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
1082; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
1083; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
1084; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
1085; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
1086; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
1087; CHECK-NEXT:    ret
1088;
1089; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i32:
1090; CHECK-ZVBB:       # %bb.0:
1091; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
1092; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
1093; CHECK-ZVBB-NEXT:    ret
1094  %v = call <vscale x 2 x i32> @llvm.vp.bitreverse.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 %evl)
1095  ret <vscale x 2 x i32> %v
1096}
1097
1098define <vscale x 2 x i32> @vp_bitreverse_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 zeroext %evl) {
1099; CHECK-LABEL: vp_bitreverse_nxv2i32_unmasked:
1100; CHECK:       # %bb.0:
1101; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
1102; CHECK-NEXT:    vsrl.vi v9, v8, 8
1103; CHECK-NEXT:    lui a0, 16
1104; CHECK-NEXT:    vsrl.vi v10, v8, 24
1105; CHECK-NEXT:    addi a0, a0, -256
1106; CHECK-NEXT:    vand.vx v9, v9, a0
1107; CHECK-NEXT:    vor.vv v9, v9, v10
1108; CHECK-NEXT:    vsll.vi v10, v8, 24
1109; CHECK-NEXT:    vand.vx v8, v8, a0
1110; CHECK-NEXT:    lui a0, 61681
1111; CHECK-NEXT:    addi a0, a0, -241
1112; CHECK-NEXT:    vsll.vi v8, v8, 8
1113; CHECK-NEXT:    vor.vv v8, v10, v8
1114; CHECK-NEXT:    vor.vv v8, v8, v9
1115; CHECK-NEXT:    vsrl.vi v9, v8, 4
1116; CHECK-NEXT:    vand.vx v8, v8, a0
1117; CHECK-NEXT:    vand.vx v9, v9, a0
1118; CHECK-NEXT:    lui a0, 209715
1119; CHECK-NEXT:    addi a0, a0, 819
1120; CHECK-NEXT:    vsll.vi v8, v8, 4
1121; CHECK-NEXT:    vor.vv v8, v9, v8
1122; CHECK-NEXT:    vsrl.vi v9, v8, 2
1123; CHECK-NEXT:    vand.vx v8, v8, a0
1124; CHECK-NEXT:    vand.vx v9, v9, a0
1125; CHECK-NEXT:    lui a0, 349525
1126; CHECK-NEXT:    addi a0, a0, 1365
1127; CHECK-NEXT:    vsll.vi v8, v8, 2
1128; CHECK-NEXT:    vor.vv v8, v9, v8
1129; CHECK-NEXT:    vsrl.vi v9, v8, 1
1130; CHECK-NEXT:    vand.vx v8, v8, a0
1131; CHECK-NEXT:    vand.vx v9, v9, a0
1132; CHECK-NEXT:    vadd.vv v8, v8, v8
1133; CHECK-NEXT:    vor.vv v8, v9, v8
1134; CHECK-NEXT:    ret
1135;
1136; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i32_unmasked:
1137; CHECK-ZVBB:       # %bb.0:
1138; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
1139; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
1140; CHECK-ZVBB-NEXT:    ret
1141  %v = call <vscale x 2 x i32> @llvm.vp.bitreverse.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
1142  ret <vscale x 2 x i32> %v
1143}
1144
1145declare <vscale x 4 x i32> @llvm.vp.bitreverse.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32)
1146
1147define <vscale x 4 x i32> @vp_bitreverse_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1148; CHECK-LABEL: vp_bitreverse_nxv4i32:
1149; CHECK:       # %bb.0:
1150; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
1151; CHECK-NEXT:    vsrl.vi v10, v8, 8, v0.t
1152; CHECK-NEXT:    lui a0, 16
1153; CHECK-NEXT:    addi a0, a0, -256
1154; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
1155; CHECK-NEXT:    vsrl.vi v12, v8, 24, v0.t
1156; CHECK-NEXT:    vor.vv v10, v10, v12, v0.t
1157; CHECK-NEXT:    vand.vx v12, v8, a0, v0.t
1158; CHECK-NEXT:    lui a0, 61681
1159; CHECK-NEXT:    addi a0, a0, -241
1160; CHECK-NEXT:    vsll.vi v12, v12, 8, v0.t
1161; CHECK-NEXT:    vsll.vi v8, v8, 24, v0.t
1162; CHECK-NEXT:    vor.vv v8, v8, v12, v0.t
1163; CHECK-NEXT:    vor.vv v8, v8, v10, v0.t
1164; CHECK-NEXT:    vsrl.vi v10, v8, 4, v0.t
1165; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
1166; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
1167; CHECK-NEXT:    lui a0, 209715
1168; CHECK-NEXT:    addi a0, a0, 819
1169; CHECK-NEXT:    vsll.vi v8, v8, 4, v0.t
1170; CHECK-NEXT:    vor.vv v8, v10, v8, v0.t
1171; CHECK-NEXT:    vsrl.vi v10, v8, 2, v0.t
1172; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
1173; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
1174; CHECK-NEXT:    lui a0, 349525
1175; CHECK-NEXT:    addi a0, a0, 1365
1176; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
1177; CHECK-NEXT:    vor.vv v8, v10, v8, v0.t
1178; CHECK-NEXT:    vsrl.vi v10, v8, 1, v0.t
1179; CHECK-NEXT:    vand.vx v10, v10, a0, v0.t
1180; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
1181; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
1182; CHECK-NEXT:    vor.vv v8, v10, v8, v0.t
1183; CHECK-NEXT:    ret
1184;
1185; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i32:
1186; CHECK-ZVBB:       # %bb.0:
1187; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
1188; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
1189; CHECK-ZVBB-NEXT:    ret
1190  %v = call <vscale x 4 x i32> @llvm.vp.bitreverse.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 %evl)
1191  ret <vscale x 4 x i32> %v
1192}
1193
1194define <vscale x 4 x i32> @vp_bitreverse_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 zeroext %evl) {
1195; CHECK-LABEL: vp_bitreverse_nxv4i32_unmasked:
1196; CHECK:       # %bb.0:
1197; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
1198; CHECK-NEXT:    vsrl.vi v10, v8, 8
1199; CHECK-NEXT:    lui a0, 16
1200; CHECK-NEXT:    vsrl.vi v12, v8, 24
1201; CHECK-NEXT:    addi a0, a0, -256
1202; CHECK-NEXT:    vand.vx v10, v10, a0
1203; CHECK-NEXT:    vor.vv v10, v10, v12
1204; CHECK-NEXT:    vsll.vi v12, v8, 24
1205; CHECK-NEXT:    vand.vx v8, v8, a0
1206; CHECK-NEXT:    lui a0, 61681
1207; CHECK-NEXT:    addi a0, a0, -241
1208; CHECK-NEXT:    vsll.vi v8, v8, 8
1209; CHECK-NEXT:    vor.vv v8, v12, v8
1210; CHECK-NEXT:    vor.vv v8, v8, v10
1211; CHECK-NEXT:    vsrl.vi v10, v8, 4
1212; CHECK-NEXT:    vand.vx v8, v8, a0
1213; CHECK-NEXT:    vand.vx v10, v10, a0
1214; CHECK-NEXT:    lui a0, 209715
1215; CHECK-NEXT:    addi a0, a0, 819
1216; CHECK-NEXT:    vsll.vi v8, v8, 4
1217; CHECK-NEXT:    vor.vv v8, v10, v8
1218; CHECK-NEXT:    vsrl.vi v10, v8, 2
1219; CHECK-NEXT:    vand.vx v8, v8, a0
1220; CHECK-NEXT:    vand.vx v10, v10, a0
1221; CHECK-NEXT:    lui a0, 349525
1222; CHECK-NEXT:    addi a0, a0, 1365
1223; CHECK-NEXT:    vsll.vi v8, v8, 2
1224; CHECK-NEXT:    vor.vv v8, v10, v8
1225; CHECK-NEXT:    vsrl.vi v10, v8, 1
1226; CHECK-NEXT:    vand.vx v8, v8, a0
1227; CHECK-NEXT:    vand.vx v10, v10, a0
1228; CHECK-NEXT:    vadd.vv v8, v8, v8
1229; CHECK-NEXT:    vor.vv v8, v10, v8
1230; CHECK-NEXT:    ret
1231;
1232; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i32_unmasked:
1233; CHECK-ZVBB:       # %bb.0:
1234; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
1235; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
1236; CHECK-ZVBB-NEXT:    ret
1237  %v = call <vscale x 4 x i32> @llvm.vp.bitreverse.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
1238  ret <vscale x 4 x i32> %v
1239}
1240
1241declare <vscale x 8 x i32> @llvm.vp.bitreverse.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i1>, i32)
1242
1243define <vscale x 8 x i32> @vp_bitreverse_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1244; CHECK-LABEL: vp_bitreverse_nxv8i32:
1245; CHECK:       # %bb.0:
1246; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
1247; CHECK-NEXT:    vsrl.vi v12, v8, 8, v0.t
1248; CHECK-NEXT:    lui a0, 16
1249; CHECK-NEXT:    addi a0, a0, -256
1250; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
1251; CHECK-NEXT:    vsrl.vi v16, v8, 24, v0.t
1252; CHECK-NEXT:    vor.vv v12, v12, v16, v0.t
1253; CHECK-NEXT:    vand.vx v16, v8, a0, v0.t
1254; CHECK-NEXT:    lui a0, 61681
1255; CHECK-NEXT:    addi a0, a0, -241
1256; CHECK-NEXT:    vsll.vi v16, v16, 8, v0.t
1257; CHECK-NEXT:    vsll.vi v8, v8, 24, v0.t
1258; CHECK-NEXT:    vor.vv v8, v8, v16, v0.t
1259; CHECK-NEXT:    vor.vv v8, v8, v12, v0.t
1260; CHECK-NEXT:    vsrl.vi v12, v8, 4, v0.t
1261; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
1262; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
1263; CHECK-NEXT:    lui a0, 209715
1264; CHECK-NEXT:    addi a0, a0, 819
1265; CHECK-NEXT:    vsll.vi v8, v8, 4, v0.t
1266; CHECK-NEXT:    vor.vv v8, v12, v8, v0.t
1267; CHECK-NEXT:    vsrl.vi v12, v8, 2, v0.t
1268; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
1269; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
1270; CHECK-NEXT:    lui a0, 349525
1271; CHECK-NEXT:    addi a0, a0, 1365
1272; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
1273; CHECK-NEXT:    vor.vv v8, v12, v8, v0.t
1274; CHECK-NEXT:    vsrl.vi v12, v8, 1, v0.t
1275; CHECK-NEXT:    vand.vx v12, v12, a0, v0.t
1276; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
1277; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
1278; CHECK-NEXT:    vor.vv v8, v12, v8, v0.t
1279; CHECK-NEXT:    ret
1280;
1281; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i32:
1282; CHECK-ZVBB:       # %bb.0:
1283; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
1284; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
1285; CHECK-ZVBB-NEXT:    ret
1286  %v = call <vscale x 8 x i32> @llvm.vp.bitreverse.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 %evl)
1287  ret <vscale x 8 x i32> %v
1288}
1289
1290define <vscale x 8 x i32> @vp_bitreverse_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 zeroext %evl) {
1291; CHECK-LABEL: vp_bitreverse_nxv8i32_unmasked:
1292; CHECK:       # %bb.0:
1293; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
1294; CHECK-NEXT:    vsrl.vi v12, v8, 8
1295; CHECK-NEXT:    lui a0, 16
1296; CHECK-NEXT:    vsrl.vi v16, v8, 24
1297; CHECK-NEXT:    addi a0, a0, -256
1298; CHECK-NEXT:    vand.vx v12, v12, a0
1299; CHECK-NEXT:    vor.vv v12, v12, v16
1300; CHECK-NEXT:    vsll.vi v16, v8, 24
1301; CHECK-NEXT:    vand.vx v8, v8, a0
1302; CHECK-NEXT:    lui a0, 61681
1303; CHECK-NEXT:    addi a0, a0, -241
1304; CHECK-NEXT:    vsll.vi v8, v8, 8
1305; CHECK-NEXT:    vor.vv v8, v16, v8
1306; CHECK-NEXT:    vor.vv v8, v8, v12
1307; CHECK-NEXT:    vsrl.vi v12, v8, 4
1308; CHECK-NEXT:    vand.vx v8, v8, a0
1309; CHECK-NEXT:    vand.vx v12, v12, a0
1310; CHECK-NEXT:    lui a0, 209715
1311; CHECK-NEXT:    addi a0, a0, 819
1312; CHECK-NEXT:    vsll.vi v8, v8, 4
1313; CHECK-NEXT:    vor.vv v8, v12, v8
1314; CHECK-NEXT:    vsrl.vi v12, v8, 2
1315; CHECK-NEXT:    vand.vx v8, v8, a0
1316; CHECK-NEXT:    vand.vx v12, v12, a0
1317; CHECK-NEXT:    lui a0, 349525
1318; CHECK-NEXT:    addi a0, a0, 1365
1319; CHECK-NEXT:    vsll.vi v8, v8, 2
1320; CHECK-NEXT:    vor.vv v8, v12, v8
1321; CHECK-NEXT:    vsrl.vi v12, v8, 1
1322; CHECK-NEXT:    vand.vx v8, v8, a0
1323; CHECK-NEXT:    vand.vx v12, v12, a0
1324; CHECK-NEXT:    vadd.vv v8, v8, v8
1325; CHECK-NEXT:    vor.vv v8, v12, v8
1326; CHECK-NEXT:    ret
1327;
1328; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i32_unmasked:
1329; CHECK-ZVBB:       # %bb.0:
1330; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
1331; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
1332; CHECK-ZVBB-NEXT:    ret
1333  %v = call <vscale x 8 x i32> @llvm.vp.bitreverse.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
1334  ret <vscale x 8 x i32> %v
1335}
1336
1337declare <vscale x 16 x i32> @llvm.vp.bitreverse.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i1>, i32)
1338
1339define <vscale x 16 x i32> @vp_bitreverse_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
1340; CHECK-LABEL: vp_bitreverse_nxv16i32:
1341; CHECK:       # %bb.0:
1342; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
1343; CHECK-NEXT:    vsrl.vi v16, v8, 8, v0.t
1344; CHECK-NEXT:    lui a0, 16
1345; CHECK-NEXT:    addi a0, a0, -256
1346; CHECK-NEXT:    vand.vx v16, v16, a0, v0.t
1347; CHECK-NEXT:    vsrl.vi v24, v8, 24, v0.t
1348; CHECK-NEXT:    vor.vv v16, v16, v24, v0.t
1349; CHECK-NEXT:    vand.vx v24, v8, a0, v0.t
1350; CHECK-NEXT:    lui a0, 61681
1351; CHECK-NEXT:    addi a0, a0, -241
1352; CHECK-NEXT:    vsll.vi v24, v24, 8, v0.t
1353; CHECK-NEXT:    vsll.vi v8, v8, 24, v0.t
1354; CHECK-NEXT:    vor.vv v8, v8, v24, v0.t
1355; CHECK-NEXT:    vor.vv v16, v8, v16, v0.t
1356; CHECK-NEXT:    vsrl.vi v8, v16, 4, v0.t
1357; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
1358; CHECK-NEXT:    vand.vx v16, v16, a0, v0.t
1359; CHECK-NEXT:    lui a0, 209715
1360; CHECK-NEXT:    addi a0, a0, 819
1361; CHECK-NEXT:    vsll.vi v16, v16, 4, v0.t
1362; CHECK-NEXT:    vor.vv v16, v8, v16, v0.t
1363; CHECK-NEXT:    vsrl.vi v8, v16, 2, v0.t
1364; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
1365; CHECK-NEXT:    vand.vx v16, v16, a0, v0.t
1366; CHECK-NEXT:    lui a0, 349525
1367; CHECK-NEXT:    addi a0, a0, 1365
1368; CHECK-NEXT:    vsll.vi v16, v16, 2, v0.t
1369; CHECK-NEXT:    vor.vv v8, v8, v16, v0.t
1370; CHECK-NEXT:    vsrl.vi v16, v8, 1, v0.t
1371; CHECK-NEXT:    vand.vx v16, v16, a0, v0.t
1372; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
1373; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
1374; CHECK-NEXT:    vor.vv v8, v16, v8, v0.t
1375; CHECK-NEXT:    ret
1376;
1377; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i32:
1378; CHECK-ZVBB:       # %bb.0:
1379; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
1380; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
1381; CHECK-ZVBB-NEXT:    ret
1382  %v = call <vscale x 16 x i32> @llvm.vp.bitreverse.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 %evl)
1383  ret <vscale x 16 x i32> %v
1384}
1385
1386define <vscale x 16 x i32> @vp_bitreverse_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 zeroext %evl) {
1387; CHECK-LABEL: vp_bitreverse_nxv16i32_unmasked:
1388; CHECK:       # %bb.0:
1389; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
1390; CHECK-NEXT:    vsrl.vi v16, v8, 8
1391; CHECK-NEXT:    lui a0, 16
1392; CHECK-NEXT:    vsrl.vi v24, v8, 24
1393; CHECK-NEXT:    addi a0, a0, -256
1394; CHECK-NEXT:    vand.vx v16, v16, a0
1395; CHECK-NEXT:    vor.vv v16, v16, v24
1396; CHECK-NEXT:    vsll.vi v24, v8, 24
1397; CHECK-NEXT:    vand.vx v8, v8, a0
1398; CHECK-NEXT:    lui a0, 61681
1399; CHECK-NEXT:    addi a0, a0, -241
1400; CHECK-NEXT:    vsll.vi v8, v8, 8
1401; CHECK-NEXT:    vor.vv v8, v24, v8
1402; CHECK-NEXT:    vor.vv v8, v8, v16
1403; CHECK-NEXT:    vsrl.vi v16, v8, 4
1404; CHECK-NEXT:    vand.vx v8, v8, a0
1405; CHECK-NEXT:    vand.vx v16, v16, a0
1406; CHECK-NEXT:    lui a0, 209715
1407; CHECK-NEXT:    addi a0, a0, 819
1408; CHECK-NEXT:    vsll.vi v8, v8, 4
1409; CHECK-NEXT:    vor.vv v8, v16, v8
1410; CHECK-NEXT:    vsrl.vi v16, v8, 2
1411; CHECK-NEXT:    vand.vx v8, v8, a0
1412; CHECK-NEXT:    vand.vx v16, v16, a0
1413; CHECK-NEXT:    lui a0, 349525
1414; CHECK-NEXT:    addi a0, a0, 1365
1415; CHECK-NEXT:    vsll.vi v8, v8, 2
1416; CHECK-NEXT:    vor.vv v8, v16, v8
1417; CHECK-NEXT:    vsrl.vi v16, v8, 1
1418; CHECK-NEXT:    vand.vx v8, v8, a0
1419; CHECK-NEXT:    vand.vx v16, v16, a0
1420; CHECK-NEXT:    vadd.vv v8, v8, v8
1421; CHECK-NEXT:    vor.vv v8, v16, v8
1422; CHECK-NEXT:    ret
1423;
1424; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i32_unmasked:
1425; CHECK-ZVBB:       # %bb.0:
1426; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
1427; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
1428; CHECK-ZVBB-NEXT:    ret
1429  %v = call <vscale x 16 x i32> @llvm.vp.bitreverse.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
1430  ret <vscale x 16 x i32> %v
1431}
1432
1433declare <vscale x 1 x i64> @llvm.vp.bitreverse.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i1>, i32)
1434
1435define <vscale x 1 x i64> @vp_bitreverse_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1436; RV32-LABEL: vp_bitreverse_nxv1i64:
1437; RV32:       # %bb.0:
1438; RV32-NEXT:    addi sp, sp, -16
1439; RV32-NEXT:    .cfi_def_cfa_offset 16
1440; RV32-NEXT:    lui a4, 1044480
1441; RV32-NEXT:    li a3, 56
1442; RV32-NEXT:    lui a5, 16
1443; RV32-NEXT:    li a2, 40
1444; RV32-NEXT:    lui a1, 4080
1445; RV32-NEXT:    addi a6, sp, 8
1446; RV32-NEXT:    sw a4, 8(sp)
1447; RV32-NEXT:    sw zero, 12(sp)
1448; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1449; RV32-NEXT:    vlse64.v v9, (a6), zero
1450; RV32-NEXT:    lui a4, 61681
1451; RV32-NEXT:    vsll.vx v10, v8, a3, v0.t
1452; RV32-NEXT:    addi a5, a5, -256
1453; RV32-NEXT:    vand.vx v11, v8, a5, v0.t
1454; RV32-NEXT:    vsll.vx v11, v11, a2, v0.t
1455; RV32-NEXT:    vor.vv v10, v10, v11, v0.t
1456; RV32-NEXT:    vand.vx v11, v8, a1, v0.t
1457; RV32-NEXT:    vsll.vi v11, v11, 24, v0.t
1458; RV32-NEXT:    vand.vv v12, v8, v9, v0.t
1459; RV32-NEXT:    vsll.vi v12, v12, 8, v0.t
1460; RV32-NEXT:    vor.vv v11, v11, v12, v0.t
1461; RV32-NEXT:    vor.vv v10, v10, v11, v0.t
1462; RV32-NEXT:    vsrl.vx v11, v8, a3, v0.t
1463; RV32-NEXT:    lui a3, 209715
1464; RV32-NEXT:    vsrl.vx v12, v8, a2, v0.t
1465; RV32-NEXT:    lui a2, 349525
1466; RV32-NEXT:    addi a4, a4, -241
1467; RV32-NEXT:    addi a3, a3, 819
1468; RV32-NEXT:    addi a2, a2, 1365
1469; RV32-NEXT:    vand.vx v12, v12, a5, v0.t
1470; RV32-NEXT:    vor.vv v11, v12, v11, v0.t
1471; RV32-NEXT:    vsrl.vi v12, v8, 24, v0.t
1472; RV32-NEXT:    vand.vx v12, v12, a1, v0.t
1473; RV32-NEXT:    vsrl.vi v8, v8, 8, v0.t
1474; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
1475; RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
1476; RV32-NEXT:    vmv.v.x v9, a4
1477; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1478; RV32-NEXT:    vor.vv v8, v8, v12, v0.t
1479; RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
1480; RV32-NEXT:    vmv.v.x v12, a3
1481; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1482; RV32-NEXT:    vor.vv v8, v8, v11, v0.t
1483; RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
1484; RV32-NEXT:    vmv.v.x v11, a2
1485; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1486; RV32-NEXT:    vor.vv v8, v10, v8, v0.t
1487; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
1488; RV32-NEXT:    vand.vv v10, v10, v9, v0.t
1489; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
1490; RV32-NEXT:    vsll.vi v8, v8, 4, v0.t
1491; RV32-NEXT:    vor.vv v8, v10, v8, v0.t
1492; RV32-NEXT:    vsrl.vi v9, v8, 2, v0.t
1493; RV32-NEXT:    vand.vv v9, v9, v12, v0.t
1494; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
1495; RV32-NEXT:    vsll.vi v8, v8, 2, v0.t
1496; RV32-NEXT:    vor.vv v8, v9, v8, v0.t
1497; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
1498; RV32-NEXT:    vand.vv v9, v9, v11, v0.t
1499; RV32-NEXT:    vand.vv v8, v8, v11, v0.t
1500; RV32-NEXT:    vsll.vi v8, v8, 1, v0.t
1501; RV32-NEXT:    vor.vv v8, v9, v8, v0.t
1502; RV32-NEXT:    addi sp, sp, 16
1503; RV32-NEXT:    .cfi_def_cfa_offset 0
1504; RV32-NEXT:    ret
1505;
1506; RV64-LABEL: vp_bitreverse_nxv1i64:
1507; RV64:       # %bb.0:
1508; RV64-NEXT:    lui a1, 4080
1509; RV64-NEXT:    li a3, 255
1510; RV64-NEXT:    li a2, 56
1511; RV64-NEXT:    lui a4, 16
1512; RV64-NEXT:    lui a5, 61681
1513; RV64-NEXT:    lui a6, 209715
1514; RV64-NEXT:    lui a7, 349525
1515; RV64-NEXT:    addiw a5, a5, -241
1516; RV64-NEXT:    addiw a6, a6, 819
1517; RV64-NEXT:    addiw a7, a7, 1365
1518; RV64-NEXT:    slli t0, a5, 32
1519; RV64-NEXT:    add t0, a5, t0
1520; RV64-NEXT:    slli a5, a6, 32
1521; RV64-NEXT:    add a6, a6, a5
1522; RV64-NEXT:    slli a5, a7, 32
1523; RV64-NEXT:    add a5, a7, a5
1524; RV64-NEXT:    li a7, 40
1525; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1526; RV64-NEXT:    vand.vx v9, v8, a1, v0.t
1527; RV64-NEXT:    slli a3, a3, 24
1528; RV64-NEXT:    addiw a0, a4, -256
1529; RV64-NEXT:    vsll.vi v9, v9, 24, v0.t
1530; RV64-NEXT:    vand.vx v10, v8, a3, v0.t
1531; RV64-NEXT:    vsll.vi v10, v10, 8, v0.t
1532; RV64-NEXT:    vor.vv v9, v9, v10, v0.t
1533; RV64-NEXT:    vsll.vx v10, v8, a2, v0.t
1534; RV64-NEXT:    vand.vx v11, v8, a0, v0.t
1535; RV64-NEXT:    vsll.vx v11, v11, a7, v0.t
1536; RV64-NEXT:    vor.vv v10, v10, v11, v0.t
1537; RV64-NEXT:    vor.vv v9, v10, v9, v0.t
1538; RV64-NEXT:    vsrl.vx v10, v8, a2, v0.t
1539; RV64-NEXT:    vsrl.vx v11, v8, a7, v0.t
1540; RV64-NEXT:    vand.vx v11, v11, a0, v0.t
1541; RV64-NEXT:    vor.vv v10, v11, v10, v0.t
1542; RV64-NEXT:    vsrl.vi v11, v8, 24, v0.t
1543; RV64-NEXT:    vand.vx v11, v11, a1, v0.t
1544; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
1545; RV64-NEXT:    vand.vx v8, v8, a3, v0.t
1546; RV64-NEXT:    vor.vv v8, v8, v11, v0.t
1547; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
1548; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
1549; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
1550; RV64-NEXT:    vand.vx v9, v9, t0, v0.t
1551; RV64-NEXT:    vand.vx v8, v8, t0, v0.t
1552; RV64-NEXT:    vsll.vi v8, v8, 4, v0.t
1553; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
1554; RV64-NEXT:    vsrl.vi v9, v8, 2, v0.t
1555; RV64-NEXT:    vand.vx v9, v9, a6, v0.t
1556; RV64-NEXT:    vand.vx v8, v8, a6, v0.t
1557; RV64-NEXT:    vsll.vi v8, v8, 2, v0.t
1558; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
1559; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
1560; RV64-NEXT:    vand.vx v9, v9, a5, v0.t
1561; RV64-NEXT:    vand.vx v8, v8, a5, v0.t
1562; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
1563; RV64-NEXT:    vor.vv v8, v9, v8, v0.t
1564; RV64-NEXT:    ret
1565;
1566; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i64:
1567; CHECK-ZVBB:       # %bb.0:
1568; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1569; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
1570; CHECK-ZVBB-NEXT:    ret
1571  %v = call <vscale x 1 x i64> @llvm.vp.bitreverse.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 %evl)
1572  ret <vscale x 1 x i64> %v
1573}
1574
1575define <vscale x 1 x i64> @vp_bitreverse_nxv1i64_unmasked(<vscale x 1 x i64> %va, i32 zeroext %evl) {
1576; RV32-LABEL: vp_bitreverse_nxv1i64_unmasked:
1577; RV32:       # %bb.0:
1578; RV32-NEXT:    addi sp, sp, -16
1579; RV32-NEXT:    .cfi_def_cfa_offset 16
1580; RV32-NEXT:    lui a1, 1044480
1581; RV32-NEXT:    li a2, 56
1582; RV32-NEXT:    lui a3, 16
1583; RV32-NEXT:    li a4, 40
1584; RV32-NEXT:    lui a5, 4080
1585; RV32-NEXT:    addi a6, sp, 8
1586; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1587; RV32-NEXT:    vsrl.vi v9, v8, 24
1588; RV32-NEXT:    sw a1, 8(sp)
1589; RV32-NEXT:    sw zero, 12(sp)
1590; RV32-NEXT:    vsll.vx v10, v8, a2
1591; RV32-NEXT:    addi a1, a3, -256
1592; RV32-NEXT:    vsrl.vx v11, v8, a2
1593; RV32-NEXT:    vsrl.vx v12, v8, a4
1594; RV32-NEXT:    vand.vx v13, v8, a1
1595; RV32-NEXT:    vand.vx v12, v12, a1
1596; RV32-NEXT:    vor.vv v11, v12, v11
1597; RV32-NEXT:    vlse64.v v12, (a6), zero
1598; RV32-NEXT:    vsll.vx v13, v13, a4
1599; RV32-NEXT:    vor.vv v10, v10, v13
1600; RV32-NEXT:    vsrl.vi v13, v8, 8
1601; RV32-NEXT:    vand.vx v9, v9, a5
1602; RV32-NEXT:    vand.vv v13, v13, v12
1603; RV32-NEXT:    vor.vv v9, v13, v9
1604; RV32-NEXT:    lui a1, 61681
1605; RV32-NEXT:    lui a2, 209715
1606; RV32-NEXT:    lui a3, 349525
1607; RV32-NEXT:    vand.vv v12, v8, v12
1608; RV32-NEXT:    vand.vx v8, v8, a5
1609; RV32-NEXT:    addi a1, a1, -241
1610; RV32-NEXT:    addi a2, a2, 819
1611; RV32-NEXT:    addi a3, a3, 1365
1612; RV32-NEXT:    vsll.vi v8, v8, 24
1613; RV32-NEXT:    vsll.vi v12, v12, 8
1614; RV32-NEXT:    vor.vv v8, v8, v12
1615; RV32-NEXT:    vsetvli a4, zero, e32, m1, ta, ma
1616; RV32-NEXT:    vmv.v.x v12, a1
1617; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1618; RV32-NEXT:    vor.vv v9, v9, v11
1619; RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
1620; RV32-NEXT:    vmv.v.x v11, a2
1621; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1622; RV32-NEXT:    vor.vv v8, v10, v8
1623; RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
1624; RV32-NEXT:    vmv.v.x v10, a3
1625; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1626; RV32-NEXT:    vor.vv v8, v8, v9
1627; RV32-NEXT:    vsrl.vi v9, v8, 4
1628; RV32-NEXT:    vand.vv v8, v8, v12
1629; RV32-NEXT:    vand.vv v9, v9, v12
1630; RV32-NEXT:    vsll.vi v8, v8, 4
1631; RV32-NEXT:    vor.vv v8, v9, v8
1632; RV32-NEXT:    vsrl.vi v9, v8, 2
1633; RV32-NEXT:    vand.vv v8, v8, v11
1634; RV32-NEXT:    vand.vv v9, v9, v11
1635; RV32-NEXT:    vsll.vi v8, v8, 2
1636; RV32-NEXT:    vor.vv v8, v9, v8
1637; RV32-NEXT:    vsrl.vi v9, v8, 1
1638; RV32-NEXT:    vand.vv v8, v8, v10
1639; RV32-NEXT:    vand.vv v9, v9, v10
1640; RV32-NEXT:    vadd.vv v8, v8, v8
1641; RV32-NEXT:    vor.vv v8, v9, v8
1642; RV32-NEXT:    addi sp, sp, 16
1643; RV32-NEXT:    .cfi_def_cfa_offset 0
1644; RV32-NEXT:    ret
1645;
1646; RV64-LABEL: vp_bitreverse_nxv1i64_unmasked:
1647; RV64:       # %bb.0:
1648; RV64-NEXT:    lui a1, 4080
1649; RV64-NEXT:    li a2, 255
1650; RV64-NEXT:    li a3, 56
1651; RV64-NEXT:    lui a4, 16
1652; RV64-NEXT:    li a5, 40
1653; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1654; RV64-NEXT:    vsrl.vi v9, v8, 24
1655; RV64-NEXT:    vsrl.vi v10, v8, 8
1656; RV64-NEXT:    addiw a0, a4, -256
1657; RV64-NEXT:    vsrl.vx v11, v8, a3
1658; RV64-NEXT:    vsrl.vx v12, v8, a5
1659; RV64-NEXT:    vand.vx v12, v12, a0
1660; RV64-NEXT:    vor.vv v11, v12, v11
1661; RV64-NEXT:    vand.vx v12, v8, a1
1662; RV64-NEXT:    slli a2, a2, 24
1663; RV64-NEXT:    vand.vx v9, v9, a1
1664; RV64-NEXT:    vsll.vi v12, v12, 24
1665; RV64-NEXT:    vand.vx v10, v10, a2
1666; RV64-NEXT:    vor.vv v9, v10, v9
1667; RV64-NEXT:    vand.vx v10, v8, a2
1668; RV64-NEXT:    vsll.vi v10, v10, 8
1669; RV64-NEXT:    vor.vv v10, v12, v10
1670; RV64-NEXT:    vsll.vx v12, v8, a3
1671; RV64-NEXT:    vand.vx v8, v8, a0
1672; RV64-NEXT:    vsll.vx v8, v8, a5
1673; RV64-NEXT:    vor.vv v8, v12, v8
1674; RV64-NEXT:    lui a0, 61681
1675; RV64-NEXT:    lui a1, 209715
1676; RV64-NEXT:    lui a2, 349525
1677; RV64-NEXT:    addiw a0, a0, -241
1678; RV64-NEXT:    addiw a1, a1, 819
1679; RV64-NEXT:    addiw a2, a2, 1365
1680; RV64-NEXT:    slli a3, a0, 32
1681; RV64-NEXT:    slli a4, a1, 32
1682; RV64-NEXT:    add a0, a0, a3
1683; RV64-NEXT:    slli a3, a2, 32
1684; RV64-NEXT:    add a1, a1, a4
1685; RV64-NEXT:    add a2, a2, a3
1686; RV64-NEXT:    vor.vv v9, v9, v11
1687; RV64-NEXT:    vor.vv v8, v8, v10
1688; RV64-NEXT:    vor.vv v8, v8, v9
1689; RV64-NEXT:    vsrl.vi v9, v8, 4
1690; RV64-NEXT:    vand.vx v8, v8, a0
1691; RV64-NEXT:    vand.vx v9, v9, a0
1692; RV64-NEXT:    vsll.vi v8, v8, 4
1693; RV64-NEXT:    vor.vv v8, v9, v8
1694; RV64-NEXT:    vsrl.vi v9, v8, 2
1695; RV64-NEXT:    vand.vx v8, v8, a1
1696; RV64-NEXT:    vand.vx v9, v9, a1
1697; RV64-NEXT:    vsll.vi v8, v8, 2
1698; RV64-NEXT:    vor.vv v8, v9, v8
1699; RV64-NEXT:    vsrl.vi v9, v8, 1
1700; RV64-NEXT:    vand.vx v8, v8, a2
1701; RV64-NEXT:    vand.vx v9, v9, a2
1702; RV64-NEXT:    vadd.vv v8, v8, v8
1703; RV64-NEXT:    vor.vv v8, v9, v8
1704; RV64-NEXT:    ret
1705;
1706; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i64_unmasked:
1707; CHECK-ZVBB:       # %bb.0:
1708; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1709; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
1710; CHECK-ZVBB-NEXT:    ret
1711  %v = call <vscale x 1 x i64> @llvm.vp.bitreverse.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
1712  ret <vscale x 1 x i64> %v
1713}
1714
1715declare <vscale x 2 x i64> @llvm.vp.bitreverse.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32)
1716
1717define <vscale x 2 x i64> @vp_bitreverse_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1718; RV32-LABEL: vp_bitreverse_nxv2i64:
1719; RV32:       # %bb.0:
1720; RV32-NEXT:    addi sp, sp, -16
1721; RV32-NEXT:    .cfi_def_cfa_offset 16
1722; RV32-NEXT:    lui a4, 1044480
1723; RV32-NEXT:    li a3, 56
1724; RV32-NEXT:    lui a5, 16
1725; RV32-NEXT:    li a2, 40
1726; RV32-NEXT:    lui a1, 4080
1727; RV32-NEXT:    addi a6, sp, 8
1728; RV32-NEXT:    sw a4, 8(sp)
1729; RV32-NEXT:    sw zero, 12(sp)
1730; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1731; RV32-NEXT:    vlse64.v v10, (a6), zero
1732; RV32-NEXT:    lui a4, 61681
1733; RV32-NEXT:    vsll.vx v12, v8, a3, v0.t
1734; RV32-NEXT:    addi a5, a5, -256
1735; RV32-NEXT:    vand.vx v14, v8, a5, v0.t
1736; RV32-NEXT:    vsll.vx v14, v14, a2, v0.t
1737; RV32-NEXT:    vor.vv v12, v12, v14, v0.t
1738; RV32-NEXT:    vand.vx v14, v8, a1, v0.t
1739; RV32-NEXT:    vsll.vi v14, v14, 24, v0.t
1740; RV32-NEXT:    vand.vv v16, v8, v10, v0.t
1741; RV32-NEXT:    vsll.vi v16, v16, 8, v0.t
1742; RV32-NEXT:    vor.vv v14, v14, v16, v0.t
1743; RV32-NEXT:    vor.vv v12, v12, v14, v0.t
1744; RV32-NEXT:    vsrl.vx v14, v8, a3, v0.t
1745; RV32-NEXT:    lui a3, 209715
1746; RV32-NEXT:    vsrl.vx v16, v8, a2, v0.t
1747; RV32-NEXT:    lui a2, 349525
1748; RV32-NEXT:    addi a4, a4, -241
1749; RV32-NEXT:    addi a3, a3, 819
1750; RV32-NEXT:    addi a2, a2, 1365
1751; RV32-NEXT:    vand.vx v16, v16, a5, v0.t
1752; RV32-NEXT:    vor.vv v14, v16, v14, v0.t
1753; RV32-NEXT:    vsrl.vi v16, v8, 24, v0.t
1754; RV32-NEXT:    vand.vx v16, v16, a1, v0.t
1755; RV32-NEXT:    vsrl.vi v8, v8, 8, v0.t
1756; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
1757; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
1758; RV32-NEXT:    vmv.v.x v10, a4
1759; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1760; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
1761; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
1762; RV32-NEXT:    vmv.v.x v16, a3
1763; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1764; RV32-NEXT:    vor.vv v8, v8, v14, v0.t
1765; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
1766; RV32-NEXT:    vmv.v.x v14, a2
1767; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1768; RV32-NEXT:    vor.vv v8, v12, v8, v0.t
1769; RV32-NEXT:    vsrl.vi v12, v8, 4, v0.t
1770; RV32-NEXT:    vand.vv v12, v12, v10, v0.t
1771; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
1772; RV32-NEXT:    vsll.vi v8, v8, 4, v0.t
1773; RV32-NEXT:    vor.vv v8, v12, v8, v0.t
1774; RV32-NEXT:    vsrl.vi v10, v8, 2, v0.t
1775; RV32-NEXT:    vand.vv v10, v10, v16, v0.t
1776; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
1777; RV32-NEXT:    vsll.vi v8, v8, 2, v0.t
1778; RV32-NEXT:    vor.vv v8, v10, v8, v0.t
1779; RV32-NEXT:    vsrl.vi v10, v8, 1, v0.t
1780; RV32-NEXT:    vand.vv v10, v10, v14, v0.t
1781; RV32-NEXT:    vand.vv v8, v8, v14, v0.t
1782; RV32-NEXT:    vsll.vi v8, v8, 1, v0.t
1783; RV32-NEXT:    vor.vv v8, v10, v8, v0.t
1784; RV32-NEXT:    addi sp, sp, 16
1785; RV32-NEXT:    .cfi_def_cfa_offset 0
1786; RV32-NEXT:    ret
1787;
1788; RV64-LABEL: vp_bitreverse_nxv2i64:
1789; RV64:       # %bb.0:
1790; RV64-NEXT:    lui a1, 4080
1791; RV64-NEXT:    li a3, 255
1792; RV64-NEXT:    li a2, 56
1793; RV64-NEXT:    lui a4, 16
1794; RV64-NEXT:    lui a5, 61681
1795; RV64-NEXT:    lui a6, 209715
1796; RV64-NEXT:    lui a7, 349525
1797; RV64-NEXT:    addiw a5, a5, -241
1798; RV64-NEXT:    addiw a6, a6, 819
1799; RV64-NEXT:    addiw a7, a7, 1365
1800; RV64-NEXT:    slli t0, a5, 32
1801; RV64-NEXT:    add t0, a5, t0
1802; RV64-NEXT:    slli a5, a6, 32
1803; RV64-NEXT:    add a6, a6, a5
1804; RV64-NEXT:    slli a5, a7, 32
1805; RV64-NEXT:    add a5, a7, a5
1806; RV64-NEXT:    li a7, 40
1807; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1808; RV64-NEXT:    vand.vx v10, v8, a1, v0.t
1809; RV64-NEXT:    slli a3, a3, 24
1810; RV64-NEXT:    addiw a0, a4, -256
1811; RV64-NEXT:    vsll.vi v10, v10, 24, v0.t
1812; RV64-NEXT:    vand.vx v12, v8, a3, v0.t
1813; RV64-NEXT:    vsll.vi v12, v12, 8, v0.t
1814; RV64-NEXT:    vor.vv v10, v10, v12, v0.t
1815; RV64-NEXT:    vsll.vx v12, v8, a2, v0.t
1816; RV64-NEXT:    vand.vx v14, v8, a0, v0.t
1817; RV64-NEXT:    vsll.vx v14, v14, a7, v0.t
1818; RV64-NEXT:    vor.vv v12, v12, v14, v0.t
1819; RV64-NEXT:    vor.vv v10, v12, v10, v0.t
1820; RV64-NEXT:    vsrl.vx v12, v8, a2, v0.t
1821; RV64-NEXT:    vsrl.vx v14, v8, a7, v0.t
1822; RV64-NEXT:    vand.vx v14, v14, a0, v0.t
1823; RV64-NEXT:    vor.vv v12, v14, v12, v0.t
1824; RV64-NEXT:    vsrl.vi v14, v8, 24, v0.t
1825; RV64-NEXT:    vand.vx v14, v14, a1, v0.t
1826; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
1827; RV64-NEXT:    vand.vx v8, v8, a3, v0.t
1828; RV64-NEXT:    vor.vv v8, v8, v14, v0.t
1829; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
1830; RV64-NEXT:    vor.vv v8, v10, v8, v0.t
1831; RV64-NEXT:    vsrl.vi v10, v8, 4, v0.t
1832; RV64-NEXT:    vand.vx v10, v10, t0, v0.t
1833; RV64-NEXT:    vand.vx v8, v8, t0, v0.t
1834; RV64-NEXT:    vsll.vi v8, v8, 4, v0.t
1835; RV64-NEXT:    vor.vv v8, v10, v8, v0.t
1836; RV64-NEXT:    vsrl.vi v10, v8, 2, v0.t
1837; RV64-NEXT:    vand.vx v10, v10, a6, v0.t
1838; RV64-NEXT:    vand.vx v8, v8, a6, v0.t
1839; RV64-NEXT:    vsll.vi v8, v8, 2, v0.t
1840; RV64-NEXT:    vor.vv v8, v10, v8, v0.t
1841; RV64-NEXT:    vsrl.vi v10, v8, 1, v0.t
1842; RV64-NEXT:    vand.vx v10, v10, a5, v0.t
1843; RV64-NEXT:    vand.vx v8, v8, a5, v0.t
1844; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
1845; RV64-NEXT:    vor.vv v8, v10, v8, v0.t
1846; RV64-NEXT:    ret
1847;
1848; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i64:
1849; CHECK-ZVBB:       # %bb.0:
1850; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1851; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
1852; CHECK-ZVBB-NEXT:    ret
1853  %v = call <vscale x 2 x i64> @llvm.vp.bitreverse.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 %evl)
1854  ret <vscale x 2 x i64> %v
1855}
1856
1857define <vscale x 2 x i64> @vp_bitreverse_nxv2i64_unmasked(<vscale x 2 x i64> %va, i32 zeroext %evl) {
1858; RV32-LABEL: vp_bitreverse_nxv2i64_unmasked:
1859; RV32:       # %bb.0:
1860; RV32-NEXT:    addi sp, sp, -16
1861; RV32-NEXT:    .cfi_def_cfa_offset 16
1862; RV32-NEXT:    lui a1, 1044480
1863; RV32-NEXT:    li a2, 56
1864; RV32-NEXT:    lui a3, 16
1865; RV32-NEXT:    li a4, 40
1866; RV32-NEXT:    lui a5, 4080
1867; RV32-NEXT:    addi a6, sp, 8
1868; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1869; RV32-NEXT:    vsrl.vi v14, v8, 24
1870; RV32-NEXT:    sw a1, 8(sp)
1871; RV32-NEXT:    sw zero, 12(sp)
1872; RV32-NEXT:    vsll.vx v12, v8, a2
1873; RV32-NEXT:    addi a1, a3, -256
1874; RV32-NEXT:    vsrl.vx v10, v8, a2
1875; RV32-NEXT:    vsrl.vx v16, v8, a4
1876; RV32-NEXT:    vand.vx v18, v8, a1
1877; RV32-NEXT:    vand.vx v16, v16, a1
1878; RV32-NEXT:    vor.vv v10, v16, v10
1879; RV32-NEXT:    vlse64.v v16, (a6), zero
1880; RV32-NEXT:    vsll.vx v18, v18, a4
1881; RV32-NEXT:    vor.vv v12, v12, v18
1882; RV32-NEXT:    vsrl.vi v18, v8, 8
1883; RV32-NEXT:    vand.vx v14, v14, a5
1884; RV32-NEXT:    vand.vv v18, v18, v16
1885; RV32-NEXT:    vor.vv v14, v18, v14
1886; RV32-NEXT:    lui a1, 61681
1887; RV32-NEXT:    lui a2, 209715
1888; RV32-NEXT:    lui a3, 349525
1889; RV32-NEXT:    vand.vv v16, v8, v16
1890; RV32-NEXT:    vand.vx v8, v8, a5
1891; RV32-NEXT:    addi a1, a1, -241
1892; RV32-NEXT:    addi a2, a2, 819
1893; RV32-NEXT:    addi a3, a3, 1365
1894; RV32-NEXT:    vsll.vi v8, v8, 24
1895; RV32-NEXT:    vsll.vi v16, v16, 8
1896; RV32-NEXT:    vor.vv v8, v8, v16
1897; RV32-NEXT:    vsetvli a4, zero, e32, m2, ta, ma
1898; RV32-NEXT:    vmv.v.x v16, a1
1899; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1900; RV32-NEXT:    vor.vv v10, v14, v10
1901; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
1902; RV32-NEXT:    vmv.v.x v14, a2
1903; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1904; RV32-NEXT:    vor.vv v8, v12, v8
1905; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
1906; RV32-NEXT:    vmv.v.x v12, a3
1907; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1908; RV32-NEXT:    vor.vv v8, v8, v10
1909; RV32-NEXT:    vsrl.vi v10, v8, 4
1910; RV32-NEXT:    vand.vv v8, v8, v16
1911; RV32-NEXT:    vand.vv v10, v10, v16
1912; RV32-NEXT:    vsll.vi v8, v8, 4
1913; RV32-NEXT:    vor.vv v8, v10, v8
1914; RV32-NEXT:    vsrl.vi v10, v8, 2
1915; RV32-NEXT:    vand.vv v8, v8, v14
1916; RV32-NEXT:    vand.vv v10, v10, v14
1917; RV32-NEXT:    vsll.vi v8, v8, 2
1918; RV32-NEXT:    vor.vv v8, v10, v8
1919; RV32-NEXT:    vsrl.vi v10, v8, 1
1920; RV32-NEXT:    vand.vv v8, v8, v12
1921; RV32-NEXT:    vand.vv v10, v10, v12
1922; RV32-NEXT:    vadd.vv v8, v8, v8
1923; RV32-NEXT:    vor.vv v8, v10, v8
1924; RV32-NEXT:    addi sp, sp, 16
1925; RV32-NEXT:    .cfi_def_cfa_offset 0
1926; RV32-NEXT:    ret
1927;
1928; RV64-LABEL: vp_bitreverse_nxv2i64_unmasked:
1929; RV64:       # %bb.0:
1930; RV64-NEXT:    lui a1, 4080
1931; RV64-NEXT:    li a2, 255
1932; RV64-NEXT:    li a3, 56
1933; RV64-NEXT:    lui a4, 16
1934; RV64-NEXT:    li a5, 40
1935; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1936; RV64-NEXT:    vsrl.vi v12, v8, 24
1937; RV64-NEXT:    vsrl.vi v14, v8, 8
1938; RV64-NEXT:    addiw a0, a4, -256
1939; RV64-NEXT:    vsrl.vx v10, v8, a3
1940; RV64-NEXT:    vsrl.vx v16, v8, a5
1941; RV64-NEXT:    vand.vx v16, v16, a0
1942; RV64-NEXT:    vor.vv v10, v16, v10
1943; RV64-NEXT:    vand.vx v16, v8, a1
1944; RV64-NEXT:    slli a2, a2, 24
1945; RV64-NEXT:    vand.vx v12, v12, a1
1946; RV64-NEXT:    vsll.vi v16, v16, 24
1947; RV64-NEXT:    vand.vx v14, v14, a2
1948; RV64-NEXT:    vor.vv v12, v14, v12
1949; RV64-NEXT:    vand.vx v14, v8, a2
1950; RV64-NEXT:    vsll.vi v14, v14, 8
1951; RV64-NEXT:    vor.vv v14, v16, v14
1952; RV64-NEXT:    vsll.vx v16, v8, a3
1953; RV64-NEXT:    vand.vx v8, v8, a0
1954; RV64-NEXT:    vsll.vx v8, v8, a5
1955; RV64-NEXT:    vor.vv v8, v16, v8
1956; RV64-NEXT:    lui a0, 61681
1957; RV64-NEXT:    lui a1, 209715
1958; RV64-NEXT:    lui a2, 349525
1959; RV64-NEXT:    addiw a0, a0, -241
1960; RV64-NEXT:    addiw a1, a1, 819
1961; RV64-NEXT:    addiw a2, a2, 1365
1962; RV64-NEXT:    slli a3, a0, 32
1963; RV64-NEXT:    slli a4, a1, 32
1964; RV64-NEXT:    add a0, a0, a3
1965; RV64-NEXT:    slli a3, a2, 32
1966; RV64-NEXT:    add a1, a1, a4
1967; RV64-NEXT:    add a2, a2, a3
1968; RV64-NEXT:    vor.vv v10, v12, v10
1969; RV64-NEXT:    vor.vv v8, v8, v14
1970; RV64-NEXT:    vor.vv v8, v8, v10
1971; RV64-NEXT:    vsrl.vi v10, v8, 4
1972; RV64-NEXT:    vand.vx v8, v8, a0
1973; RV64-NEXT:    vand.vx v10, v10, a0
1974; RV64-NEXT:    vsll.vi v8, v8, 4
1975; RV64-NEXT:    vor.vv v8, v10, v8
1976; RV64-NEXT:    vsrl.vi v10, v8, 2
1977; RV64-NEXT:    vand.vx v8, v8, a1
1978; RV64-NEXT:    vand.vx v10, v10, a1
1979; RV64-NEXT:    vsll.vi v8, v8, 2
1980; RV64-NEXT:    vor.vv v8, v10, v8
1981; RV64-NEXT:    vsrl.vi v10, v8, 1
1982; RV64-NEXT:    vand.vx v8, v8, a2
1983; RV64-NEXT:    vand.vx v10, v10, a2
1984; RV64-NEXT:    vadd.vv v8, v8, v8
1985; RV64-NEXT:    vor.vv v8, v10, v8
1986; RV64-NEXT:    ret
1987;
1988; CHECK-ZVBB-LABEL: vp_bitreverse_nxv2i64_unmasked:
1989; CHECK-ZVBB:       # %bb.0:
1990; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1991; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
1992; CHECK-ZVBB-NEXT:    ret
1993  %v = call <vscale x 2 x i64> @llvm.vp.bitreverse.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
1994  ret <vscale x 2 x i64> %v
1995}
1996
1997declare <vscale x 4 x i64> @llvm.vp.bitreverse.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i1>, i32)
1998
1999define <vscale x 4 x i64> @vp_bitreverse_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
2000; RV32-LABEL: vp_bitreverse_nxv4i64:
2001; RV32:       # %bb.0:
2002; RV32-NEXT:    addi sp, sp, -16
2003; RV32-NEXT:    .cfi_def_cfa_offset 16
2004; RV32-NEXT:    lui a4, 1044480
2005; RV32-NEXT:    li a3, 56
2006; RV32-NEXT:    lui a5, 16
2007; RV32-NEXT:    li a2, 40
2008; RV32-NEXT:    lui a1, 4080
2009; RV32-NEXT:    addi a6, sp, 8
2010; RV32-NEXT:    sw a4, 8(sp)
2011; RV32-NEXT:    sw zero, 12(sp)
2012; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
2013; RV32-NEXT:    vlse64.v v12, (a6), zero
2014; RV32-NEXT:    lui a4, 61681
2015; RV32-NEXT:    vsll.vx v16, v8, a3, v0.t
2016; RV32-NEXT:    addi a5, a5, -256
2017; RV32-NEXT:    vand.vx v20, v8, a5, v0.t
2018; RV32-NEXT:    vsll.vx v20, v20, a2, v0.t
2019; RV32-NEXT:    vor.vv v16, v16, v20, v0.t
2020; RV32-NEXT:    vand.vx v20, v8, a1, v0.t
2021; RV32-NEXT:    vsll.vi v20, v20, 24, v0.t
2022; RV32-NEXT:    vand.vv v24, v8, v12, v0.t
2023; RV32-NEXT:    vsll.vi v24, v24, 8, v0.t
2024; RV32-NEXT:    vor.vv v20, v20, v24, v0.t
2025; RV32-NEXT:    vor.vv v16, v16, v20, v0.t
2026; RV32-NEXT:    vsrl.vx v20, v8, a3, v0.t
2027; RV32-NEXT:    lui a3, 209715
2028; RV32-NEXT:    vsrl.vx v24, v8, a2, v0.t
2029; RV32-NEXT:    lui a2, 349525
2030; RV32-NEXT:    addi a4, a4, -241
2031; RV32-NEXT:    addi a3, a3, 819
2032; RV32-NEXT:    addi a2, a2, 1365
2033; RV32-NEXT:    vand.vx v24, v24, a5, v0.t
2034; RV32-NEXT:    vor.vv v20, v24, v20, v0.t
2035; RV32-NEXT:    vsrl.vi v24, v8, 24, v0.t
2036; RV32-NEXT:    vand.vx v24, v24, a1, v0.t
2037; RV32-NEXT:    vsrl.vi v8, v8, 8, v0.t
2038; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
2039; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
2040; RV32-NEXT:    vmv.v.x v28, a4
2041; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
2042; RV32-NEXT:    vor.vv v8, v8, v24, v0.t
2043; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
2044; RV32-NEXT:    vmv.v.x v12, a3
2045; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
2046; RV32-NEXT:    vor.vv v20, v8, v20, v0.t
2047; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
2048; RV32-NEXT:    vmv.v.x v8, a2
2049; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
2050; RV32-NEXT:    vor.vv v16, v16, v20, v0.t
2051; RV32-NEXT:    vsrl.vi v20, v16, 4, v0.t
2052; RV32-NEXT:    vand.vv v20, v20, v28, v0.t
2053; RV32-NEXT:    vand.vv v16, v16, v28, v0.t
2054; RV32-NEXT:    vsll.vi v16, v16, 4, v0.t
2055; RV32-NEXT:    vor.vv v16, v20, v16, v0.t
2056; RV32-NEXT:    vsrl.vi v20, v16, 2, v0.t
2057; RV32-NEXT:    vand.vv v20, v20, v12, v0.t
2058; RV32-NEXT:    vand.vv v12, v16, v12, v0.t
2059; RV32-NEXT:    vsll.vi v12, v12, 2, v0.t
2060; RV32-NEXT:    vor.vv v12, v20, v12, v0.t
2061; RV32-NEXT:    vsrl.vi v16, v12, 1, v0.t
2062; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
2063; RV32-NEXT:    vand.vv v8, v12, v8, v0.t
2064; RV32-NEXT:    vsll.vi v8, v8, 1, v0.t
2065; RV32-NEXT:    vor.vv v8, v16, v8, v0.t
2066; RV32-NEXT:    addi sp, sp, 16
2067; RV32-NEXT:    .cfi_def_cfa_offset 0
2068; RV32-NEXT:    ret
2069;
2070; RV64-LABEL: vp_bitreverse_nxv4i64:
2071; RV64:       # %bb.0:
2072; RV64-NEXT:    lui a1, 4080
2073; RV64-NEXT:    li a3, 255
2074; RV64-NEXT:    li a2, 56
2075; RV64-NEXT:    lui a4, 16
2076; RV64-NEXT:    lui a5, 61681
2077; RV64-NEXT:    lui a6, 209715
2078; RV64-NEXT:    lui a7, 349525
2079; RV64-NEXT:    addiw a5, a5, -241
2080; RV64-NEXT:    addiw a6, a6, 819
2081; RV64-NEXT:    addiw a7, a7, 1365
2082; RV64-NEXT:    slli t0, a5, 32
2083; RV64-NEXT:    add t0, a5, t0
2084; RV64-NEXT:    slli a5, a6, 32
2085; RV64-NEXT:    add a6, a6, a5
2086; RV64-NEXT:    slli a5, a7, 32
2087; RV64-NEXT:    add a5, a7, a5
2088; RV64-NEXT:    li a7, 40
2089; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
2090; RV64-NEXT:    vand.vx v12, v8, a1, v0.t
2091; RV64-NEXT:    slli a3, a3, 24
2092; RV64-NEXT:    addiw a0, a4, -256
2093; RV64-NEXT:    vsll.vi v12, v12, 24, v0.t
2094; RV64-NEXT:    vand.vx v16, v8, a3, v0.t
2095; RV64-NEXT:    vsll.vi v16, v16, 8, v0.t
2096; RV64-NEXT:    vor.vv v12, v12, v16, v0.t
2097; RV64-NEXT:    vsll.vx v16, v8, a2, v0.t
2098; RV64-NEXT:    vand.vx v20, v8, a0, v0.t
2099; RV64-NEXT:    vsll.vx v20, v20, a7, v0.t
2100; RV64-NEXT:    vor.vv v16, v16, v20, v0.t
2101; RV64-NEXT:    vor.vv v12, v16, v12, v0.t
2102; RV64-NEXT:    vsrl.vx v16, v8, a2, v0.t
2103; RV64-NEXT:    vsrl.vx v20, v8, a7, v0.t
2104; RV64-NEXT:    vand.vx v20, v20, a0, v0.t
2105; RV64-NEXT:    vor.vv v16, v20, v16, v0.t
2106; RV64-NEXT:    vsrl.vi v20, v8, 24, v0.t
2107; RV64-NEXT:    vand.vx v20, v20, a1, v0.t
2108; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
2109; RV64-NEXT:    vand.vx v8, v8, a3, v0.t
2110; RV64-NEXT:    vor.vv v8, v8, v20, v0.t
2111; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
2112; RV64-NEXT:    vor.vv v8, v12, v8, v0.t
2113; RV64-NEXT:    vsrl.vi v12, v8, 4, v0.t
2114; RV64-NEXT:    vand.vx v12, v12, t0, v0.t
2115; RV64-NEXT:    vand.vx v8, v8, t0, v0.t
2116; RV64-NEXT:    vsll.vi v8, v8, 4, v0.t
2117; RV64-NEXT:    vor.vv v8, v12, v8, v0.t
2118; RV64-NEXT:    vsrl.vi v12, v8, 2, v0.t
2119; RV64-NEXT:    vand.vx v12, v12, a6, v0.t
2120; RV64-NEXT:    vand.vx v8, v8, a6, v0.t
2121; RV64-NEXT:    vsll.vi v8, v8, 2, v0.t
2122; RV64-NEXT:    vor.vv v8, v12, v8, v0.t
2123; RV64-NEXT:    vsrl.vi v12, v8, 1, v0.t
2124; RV64-NEXT:    vand.vx v12, v12, a5, v0.t
2125; RV64-NEXT:    vand.vx v8, v8, a5, v0.t
2126; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
2127; RV64-NEXT:    vor.vv v8, v12, v8, v0.t
2128; RV64-NEXT:    ret
2129;
2130; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i64:
2131; CHECK-ZVBB:       # %bb.0:
2132; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
2133; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
2134; CHECK-ZVBB-NEXT:    ret
2135  %v = call <vscale x 4 x i64> @llvm.vp.bitreverse.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 %evl)
2136  ret <vscale x 4 x i64> %v
2137}
2138
2139define <vscale x 4 x i64> @vp_bitreverse_nxv4i64_unmasked(<vscale x 4 x i64> %va, i32 zeroext %evl) {
2140; RV32-LABEL: vp_bitreverse_nxv4i64_unmasked:
2141; RV32:       # %bb.0:
2142; RV32-NEXT:    addi sp, sp, -16
2143; RV32-NEXT:    .cfi_def_cfa_offset 16
2144; RV32-NEXT:    lui a1, 1044480
2145; RV32-NEXT:    li a2, 56
2146; RV32-NEXT:    lui a3, 16
2147; RV32-NEXT:    li a4, 40
2148; RV32-NEXT:    lui a5, 4080
2149; RV32-NEXT:    addi a6, sp, 8
2150; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
2151; RV32-NEXT:    vsrl.vi v20, v8, 24
2152; RV32-NEXT:    sw a1, 8(sp)
2153; RV32-NEXT:    sw zero, 12(sp)
2154; RV32-NEXT:    vsll.vx v16, v8, a2
2155; RV32-NEXT:    addi a1, a3, -256
2156; RV32-NEXT:    vsrl.vx v12, v8, a2
2157; RV32-NEXT:    vsrl.vx v24, v8, a4
2158; RV32-NEXT:    vand.vx v28, v8, a1
2159; RV32-NEXT:    vand.vx v24, v24, a1
2160; RV32-NEXT:    vor.vv v12, v24, v12
2161; RV32-NEXT:    vlse64.v v24, (a6), zero
2162; RV32-NEXT:    vsll.vx v28, v28, a4
2163; RV32-NEXT:    vor.vv v16, v16, v28
2164; RV32-NEXT:    vsrl.vi v28, v8, 8
2165; RV32-NEXT:    vand.vx v20, v20, a5
2166; RV32-NEXT:    vand.vv v28, v28, v24
2167; RV32-NEXT:    vor.vv v20, v28, v20
2168; RV32-NEXT:    lui a1, 61681
2169; RV32-NEXT:    lui a2, 209715
2170; RV32-NEXT:    lui a3, 349525
2171; RV32-NEXT:    vand.vv v24, v8, v24
2172; RV32-NEXT:    vand.vx v8, v8, a5
2173; RV32-NEXT:    addi a1, a1, -241
2174; RV32-NEXT:    addi a2, a2, 819
2175; RV32-NEXT:    addi a3, a3, 1365
2176; RV32-NEXT:    vsll.vi v8, v8, 24
2177; RV32-NEXT:    vsll.vi v24, v24, 8
2178; RV32-NEXT:    vor.vv v8, v8, v24
2179; RV32-NEXT:    vsetvli a4, zero, e32, m4, ta, ma
2180; RV32-NEXT:    vmv.v.x v24, a1
2181; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
2182; RV32-NEXT:    vor.vv v12, v20, v12
2183; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
2184; RV32-NEXT:    vmv.v.x v20, a2
2185; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
2186; RV32-NEXT:    vor.vv v8, v16, v8
2187; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
2188; RV32-NEXT:    vmv.v.x v16, a3
2189; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
2190; RV32-NEXT:    vor.vv v8, v8, v12
2191; RV32-NEXT:    vsrl.vi v12, v8, 4
2192; RV32-NEXT:    vand.vv v8, v8, v24
2193; RV32-NEXT:    vand.vv v12, v12, v24
2194; RV32-NEXT:    vsll.vi v8, v8, 4
2195; RV32-NEXT:    vor.vv v8, v12, v8
2196; RV32-NEXT:    vsrl.vi v12, v8, 2
2197; RV32-NEXT:    vand.vv v8, v8, v20
2198; RV32-NEXT:    vand.vv v12, v12, v20
2199; RV32-NEXT:    vsll.vi v8, v8, 2
2200; RV32-NEXT:    vor.vv v8, v12, v8
2201; RV32-NEXT:    vsrl.vi v12, v8, 1
2202; RV32-NEXT:    vand.vv v8, v8, v16
2203; RV32-NEXT:    vand.vv v12, v12, v16
2204; RV32-NEXT:    vadd.vv v8, v8, v8
2205; RV32-NEXT:    vor.vv v8, v12, v8
2206; RV32-NEXT:    addi sp, sp, 16
2207; RV32-NEXT:    .cfi_def_cfa_offset 0
2208; RV32-NEXT:    ret
2209;
2210; RV64-LABEL: vp_bitreverse_nxv4i64_unmasked:
2211; RV64:       # %bb.0:
2212; RV64-NEXT:    lui a1, 4080
2213; RV64-NEXT:    li a2, 255
2214; RV64-NEXT:    li a3, 56
2215; RV64-NEXT:    lui a4, 16
2216; RV64-NEXT:    li a5, 40
2217; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
2218; RV64-NEXT:    vsrl.vi v16, v8, 24
2219; RV64-NEXT:    vsrl.vi v20, v8, 8
2220; RV64-NEXT:    addiw a0, a4, -256
2221; RV64-NEXT:    vsrl.vx v12, v8, a3
2222; RV64-NEXT:    vsrl.vx v24, v8, a5
2223; RV64-NEXT:    vand.vx v24, v24, a0
2224; RV64-NEXT:    vor.vv v12, v24, v12
2225; RV64-NEXT:    vand.vx v24, v8, a1
2226; RV64-NEXT:    slli a2, a2, 24
2227; RV64-NEXT:    vand.vx v16, v16, a1
2228; RV64-NEXT:    vsll.vi v24, v24, 24
2229; RV64-NEXT:    vand.vx v20, v20, a2
2230; RV64-NEXT:    vor.vv v16, v20, v16
2231; RV64-NEXT:    vand.vx v20, v8, a2
2232; RV64-NEXT:    vsll.vi v20, v20, 8
2233; RV64-NEXT:    vor.vv v20, v24, v20
2234; RV64-NEXT:    vsll.vx v24, v8, a3
2235; RV64-NEXT:    vand.vx v8, v8, a0
2236; RV64-NEXT:    vsll.vx v8, v8, a5
2237; RV64-NEXT:    vor.vv v8, v24, v8
2238; RV64-NEXT:    lui a0, 61681
2239; RV64-NEXT:    lui a1, 209715
2240; RV64-NEXT:    lui a2, 349525
2241; RV64-NEXT:    addiw a0, a0, -241
2242; RV64-NEXT:    addiw a1, a1, 819
2243; RV64-NEXT:    addiw a2, a2, 1365
2244; RV64-NEXT:    slli a3, a0, 32
2245; RV64-NEXT:    slli a4, a1, 32
2246; RV64-NEXT:    add a0, a0, a3
2247; RV64-NEXT:    slli a3, a2, 32
2248; RV64-NEXT:    add a1, a1, a4
2249; RV64-NEXT:    add a2, a2, a3
2250; RV64-NEXT:    vor.vv v12, v16, v12
2251; RV64-NEXT:    vor.vv v8, v8, v20
2252; RV64-NEXT:    vor.vv v8, v8, v12
2253; RV64-NEXT:    vsrl.vi v12, v8, 4
2254; RV64-NEXT:    vand.vx v8, v8, a0
2255; RV64-NEXT:    vand.vx v12, v12, a0
2256; RV64-NEXT:    vsll.vi v8, v8, 4
2257; RV64-NEXT:    vor.vv v8, v12, v8
2258; RV64-NEXT:    vsrl.vi v12, v8, 2
2259; RV64-NEXT:    vand.vx v8, v8, a1
2260; RV64-NEXT:    vand.vx v12, v12, a1
2261; RV64-NEXT:    vsll.vi v8, v8, 2
2262; RV64-NEXT:    vor.vv v8, v12, v8
2263; RV64-NEXT:    vsrl.vi v12, v8, 1
2264; RV64-NEXT:    vand.vx v8, v8, a2
2265; RV64-NEXT:    vand.vx v12, v12, a2
2266; RV64-NEXT:    vadd.vv v8, v8, v8
2267; RV64-NEXT:    vor.vv v8, v12, v8
2268; RV64-NEXT:    ret
2269;
2270; CHECK-ZVBB-LABEL: vp_bitreverse_nxv4i64_unmasked:
2271; CHECK-ZVBB:       # %bb.0:
2272; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
2273; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
2274; CHECK-ZVBB-NEXT:    ret
2275  %v = call <vscale x 4 x i64> @llvm.vp.bitreverse.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
2276  ret <vscale x 4 x i64> %v
2277}
2278
2279declare <vscale x 7 x i64> @llvm.vp.bitreverse.nxv7i64(<vscale x 7 x i64>, <vscale x 7 x i1>, i32)
2280
2281define <vscale x 7 x i64> @vp_bitreverse_nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
2282; RV32-LABEL: vp_bitreverse_nxv7i64:
2283; RV32:       # %bb.0:
2284; RV32-NEXT:    addi sp, sp, -16
2285; RV32-NEXT:    .cfi_def_cfa_offset 16
2286; RV32-NEXT:    csrr a1, vlenb
2287; RV32-NEXT:    li a2, 24
2288; RV32-NEXT:    mul a1, a1, a2
2289; RV32-NEXT:    sub sp, sp, a1
2290; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
2291; RV32-NEXT:    lui a1, 1044480
2292; RV32-NEXT:    li a2, 56
2293; RV32-NEXT:    lui a3, 16
2294; RV32-NEXT:    li a4, 40
2295; RV32-NEXT:    addi a5, sp, 8
2296; RV32-NEXT:    sw a1, 8(sp)
2297; RV32-NEXT:    sw zero, 12(sp)
2298; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2299; RV32-NEXT:    vsll.vx v16, v8, a2, v0.t
2300; RV32-NEXT:    addi a1, a3, -256
2301; RV32-NEXT:    vand.vx v24, v8, a1, v0.t
2302; RV32-NEXT:    vsll.vx v24, v24, a4, v0.t
2303; RV32-NEXT:    vor.vv v16, v16, v24, v0.t
2304; RV32-NEXT:    csrr a3, vlenb
2305; RV32-NEXT:    slli a3, a3, 4
2306; RV32-NEXT:    add a3, sp, a3
2307; RV32-NEXT:    addi a3, a3, 16
2308; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
2309; RV32-NEXT:    vlse64.v v16, (a5), zero
2310; RV32-NEXT:    csrr a3, vlenb
2311; RV32-NEXT:    slli a3, a3, 3
2312; RV32-NEXT:    add a3, sp, a3
2313; RV32-NEXT:    addi a3, a3, 16
2314; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
2315; RV32-NEXT:    lui a3, 4080
2316; RV32-NEXT:    vand.vx v24, v8, a3, v0.t
2317; RV32-NEXT:    vsll.vi v24, v24, 24, v0.t
2318; RV32-NEXT:    addi a5, sp, 16
2319; RV32-NEXT:    vs8r.v v24, (a5) # Unknown-size Folded Spill
2320; RV32-NEXT:    vand.vv v24, v8, v16, v0.t
2321; RV32-NEXT:    vsll.vi v16, v24, 8, v0.t
2322; RV32-NEXT:    vl8r.v v24, (a5) # Unknown-size Folded Reload
2323; RV32-NEXT:    vor.vv v16, v24, v16, v0.t
2324; RV32-NEXT:    csrr a5, vlenb
2325; RV32-NEXT:    slli a5, a5, 4
2326; RV32-NEXT:    add a5, sp, a5
2327; RV32-NEXT:    addi a5, a5, 16
2328; RV32-NEXT:    vl8r.v v24, (a5) # Unknown-size Folded Reload
2329; RV32-NEXT:    vor.vv v16, v24, v16, v0.t
2330; RV32-NEXT:    csrr a5, vlenb
2331; RV32-NEXT:    slli a5, a5, 4
2332; RV32-NEXT:    add a5, sp, a5
2333; RV32-NEXT:    addi a5, a5, 16
2334; RV32-NEXT:    vs8r.v v16, (a5) # Unknown-size Folded Spill
2335; RV32-NEXT:    vsrl.vx v16, v8, a2, v0.t
2336; RV32-NEXT:    vsrl.vx v24, v8, a4, v0.t
2337; RV32-NEXT:    vand.vx v24, v24, a1, v0.t
2338; RV32-NEXT:    vor.vv v16, v24, v16, v0.t
2339; RV32-NEXT:    addi a1, sp, 16
2340; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
2341; RV32-NEXT:    vsrl.vi v24, v8, 24, v0.t
2342; RV32-NEXT:    vand.vx v24, v24, a3, v0.t
2343; RV32-NEXT:    vsrl.vi v8, v8, 8, v0.t
2344; RV32-NEXT:    csrr a1, vlenb
2345; RV32-NEXT:    slli a1, a1, 3
2346; RV32-NEXT:    add a1, sp, a1
2347; RV32-NEXT:    addi a1, a1, 16
2348; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
2349; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
2350; RV32-NEXT:    vor.vv v8, v8, v24, v0.t
2351; RV32-NEXT:    addi a1, sp, 16
2352; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
2353; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
2354; RV32-NEXT:    lui a1, 61681
2355; RV32-NEXT:    lui a2, 209715
2356; RV32-NEXT:    lui a3, 349525
2357; RV32-NEXT:    addi a1, a1, -241
2358; RV32-NEXT:    addi a2, a2, 819
2359; RV32-NEXT:    addi a3, a3, 1365
2360; RV32-NEXT:    vsetvli a4, zero, e32, m8, ta, ma
2361; RV32-NEXT:    vmv.v.x v24, a1
2362; RV32-NEXT:    csrr a1, vlenb
2363; RV32-NEXT:    slli a1, a1, 4
2364; RV32-NEXT:    add a1, sp, a1
2365; RV32-NEXT:    addi a1, a1, 16
2366; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
2367; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2368; RV32-NEXT:    vor.vv v8, v16, v8, v0.t
2369; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
2370; RV32-NEXT:    vand.vv v16, v16, v24, v0.t
2371; RV32-NEXT:    vand.vv v24, v8, v24, v0.t
2372; RV32-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
2373; RV32-NEXT:    vmv.v.x v8, a2
2374; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2375; RV32-NEXT:    vsll.vi v24, v24, 4, v0.t
2376; RV32-NEXT:    vor.vv v24, v16, v24, v0.t
2377; RV32-NEXT:    vsrl.vi v16, v24, 2, v0.t
2378; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
2379; RV32-NEXT:    vand.vv v24, v24, v8, v0.t
2380; RV32-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
2381; RV32-NEXT:    vmv.v.x v8, a3
2382; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2383; RV32-NEXT:    vsll.vi v24, v24, 2, v0.t
2384; RV32-NEXT:    vor.vv v16, v16, v24, v0.t
2385; RV32-NEXT:    vsrl.vi v24, v16, 1, v0.t
2386; RV32-NEXT:    vand.vv v24, v24, v8, v0.t
2387; RV32-NEXT:    vand.vv v8, v16, v8, v0.t
2388; RV32-NEXT:    vsll.vi v8, v8, 1, v0.t
2389; RV32-NEXT:    vor.vv v8, v24, v8, v0.t
2390; RV32-NEXT:    csrr a0, vlenb
2391; RV32-NEXT:    li a1, 24
2392; RV32-NEXT:    mul a0, a0, a1
2393; RV32-NEXT:    add sp, sp, a0
2394; RV32-NEXT:    .cfi_def_cfa sp, 16
2395; RV32-NEXT:    addi sp, sp, 16
2396; RV32-NEXT:    .cfi_def_cfa_offset 0
2397; RV32-NEXT:    ret
2398;
2399; RV64-LABEL: vp_bitreverse_nxv7i64:
2400; RV64:       # %bb.0:
2401; RV64-NEXT:    addi sp, sp, -16
2402; RV64-NEXT:    .cfi_def_cfa_offset 16
2403; RV64-NEXT:    csrr a1, vlenb
2404; RV64-NEXT:    slli a1, a1, 3
2405; RV64-NEXT:    sub sp, sp, a1
2406; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
2407; RV64-NEXT:    lui a1, 4080
2408; RV64-NEXT:    li a2, 255
2409; RV64-NEXT:    li a3, 56
2410; RV64-NEXT:    lui a4, 16
2411; RV64-NEXT:    li a5, 40
2412; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2413; RV64-NEXT:    vand.vx v16, v8, a1, v0.t
2414; RV64-NEXT:    slli a2, a2, 24
2415; RV64-NEXT:    addiw a0, a4, -256
2416; RV64-NEXT:    vsll.vi v16, v16, 24, v0.t
2417; RV64-NEXT:    vand.vx v24, v8, a2, v0.t
2418; RV64-NEXT:    vsll.vi v24, v24, 8, v0.t
2419; RV64-NEXT:    vor.vv v16, v16, v24, v0.t
2420; RV64-NEXT:    addi a4, sp, 16
2421; RV64-NEXT:    vs8r.v v16, (a4) # Unknown-size Folded Spill
2422; RV64-NEXT:    vsll.vx v24, v8, a3, v0.t
2423; RV64-NEXT:    vand.vx v16, v8, a0, v0.t
2424; RV64-NEXT:    vsll.vx v16, v16, a5, v0.t
2425; RV64-NEXT:    vor.vv v16, v24, v16, v0.t
2426; RV64-NEXT:    vl8r.v v24, (a4) # Unknown-size Folded Reload
2427; RV64-NEXT:    vor.vv v16, v16, v24, v0.t
2428; RV64-NEXT:    vs8r.v v16, (a4) # Unknown-size Folded Spill
2429; RV64-NEXT:    vsrl.vx v24, v8, a3, v0.t
2430; RV64-NEXT:    vsrl.vx v16, v8, a5, v0.t
2431; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
2432; RV64-NEXT:    vor.vv v24, v16, v24, v0.t
2433; RV64-NEXT:    vsrl.vi v16, v8, 24, v0.t
2434; RV64-NEXT:    vand.vx v16, v16, a1, v0.t
2435; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
2436; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
2437; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
2438; RV64-NEXT:    vor.vv v8, v8, v24, v0.t
2439; RV64-NEXT:    lui a0, 61681
2440; RV64-NEXT:    lui a1, 209715
2441; RV64-NEXT:    lui a2, 349525
2442; RV64-NEXT:    addiw a0, a0, -241
2443; RV64-NEXT:    addiw a1, a1, 819
2444; RV64-NEXT:    addiw a2, a2, 1365
2445; RV64-NEXT:    slli a3, a0, 32
2446; RV64-NEXT:    slli a4, a1, 32
2447; RV64-NEXT:    add a0, a0, a3
2448; RV64-NEXT:    slli a3, a2, 32
2449; RV64-NEXT:    add a1, a1, a4
2450; RV64-NEXT:    add a2, a2, a3
2451; RV64-NEXT:    addi a3, sp, 16
2452; RV64-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
2453; RV64-NEXT:    vor.vv v8, v16, v8, v0.t
2454; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
2455; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
2456; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
2457; RV64-NEXT:    vsll.vi v8, v8, 4, v0.t
2458; RV64-NEXT:    vor.vv v8, v16, v8, v0.t
2459; RV64-NEXT:    vsrl.vi v16, v8, 2, v0.t
2460; RV64-NEXT:    vand.vx v16, v16, a1, v0.t
2461; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
2462; RV64-NEXT:    vsll.vi v8, v8, 2, v0.t
2463; RV64-NEXT:    vor.vv v8, v16, v8, v0.t
2464; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
2465; RV64-NEXT:    vand.vx v16, v16, a2, v0.t
2466; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
2467; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
2468; RV64-NEXT:    vor.vv v8, v16, v8, v0.t
2469; RV64-NEXT:    csrr a0, vlenb
2470; RV64-NEXT:    slli a0, a0, 3
2471; RV64-NEXT:    add sp, sp, a0
2472; RV64-NEXT:    .cfi_def_cfa sp, 16
2473; RV64-NEXT:    addi sp, sp, 16
2474; RV64-NEXT:    .cfi_def_cfa_offset 0
2475; RV64-NEXT:    ret
2476;
2477; CHECK-ZVBB-LABEL: vp_bitreverse_nxv7i64:
2478; CHECK-ZVBB:       # %bb.0:
2479; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2480; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
2481; CHECK-ZVBB-NEXT:    ret
2482  %v = call <vscale x 7 x i64> @llvm.vp.bitreverse.nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x i1> %m, i32 %evl)
2483  ret <vscale x 7 x i64> %v
2484}
2485
2486define <vscale x 7 x i64> @vp_bitreverse_nxv7i64_unmasked(<vscale x 7 x i64> %va, i32 zeroext %evl) {
2487; RV32-LABEL: vp_bitreverse_nxv7i64_unmasked:
2488; RV32:       # %bb.0:
2489; RV32-NEXT:    addi sp, sp, -16
2490; RV32-NEXT:    .cfi_def_cfa_offset 16
2491; RV32-NEXT:    csrr a1, vlenb
2492; RV32-NEXT:    slli a1, a1, 4
2493; RV32-NEXT:    sub sp, sp, a1
2494; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
2495; RV32-NEXT:    lui a1, 1044480
2496; RV32-NEXT:    li a2, 56
2497; RV32-NEXT:    lui a3, 16
2498; RV32-NEXT:    li a4, 40
2499; RV32-NEXT:    lui a5, 4080
2500; RV32-NEXT:    addi a6, sp, 8
2501; RV32-NEXT:    sw a1, 8(sp)
2502; RV32-NEXT:    sw zero, 12(sp)
2503; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2504; RV32-NEXT:    vsll.vx v16, v8, a2
2505; RV32-NEXT:    addi a1, a3, -256
2506; RV32-NEXT:    vsrl.vx v24, v8, a2
2507; RV32-NEXT:    vsrl.vx v0, v8, a4
2508; RV32-NEXT:    vand.vx v0, v0, a1
2509; RV32-NEXT:    vor.vv v24, v0, v24
2510; RV32-NEXT:    addi a2, sp, 16
2511; RV32-NEXT:    vs8r.v v24, (a2) # Unknown-size Folded Spill
2512; RV32-NEXT:    vand.vx v24, v8, a1
2513; RV32-NEXT:    vsll.vx v24, v24, a4
2514; RV32-NEXT:    vor.vv v16, v16, v24
2515; RV32-NEXT:    csrr a1, vlenb
2516; RV32-NEXT:    slli a1, a1, 3
2517; RV32-NEXT:    add a1, sp, a1
2518; RV32-NEXT:    addi a1, a1, 16
2519; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
2520; RV32-NEXT:    vlse64.v v24, (a6), zero
2521; RV32-NEXT:    vsrl.vi v16, v8, 24
2522; RV32-NEXT:    vand.vx v16, v16, a5
2523; RV32-NEXT:    vsrl.vi v0, v8, 8
2524; RV32-NEXT:    vand.vv v0, v0, v24
2525; RV32-NEXT:    vor.vv v16, v0, v16
2526; RV32-NEXT:    vand.vv v24, v8, v24
2527; RV32-NEXT:    vand.vx v8, v8, a5
2528; RV32-NEXT:    vsll.vi v8, v8, 24
2529; RV32-NEXT:    vsll.vi v24, v24, 8
2530; RV32-NEXT:    vor.vv v24, v8, v24
2531; RV32-NEXT:    addi a1, sp, 16
2532; RV32-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
2533; RV32-NEXT:    vor.vv v8, v16, v8
2534; RV32-NEXT:    lui a1, 61681
2535; RV32-NEXT:    lui a2, 209715
2536; RV32-NEXT:    lui a3, 349525
2537; RV32-NEXT:    addi a1, a1, -241
2538; RV32-NEXT:    addi a2, a2, 819
2539; RV32-NEXT:    addi a3, a3, 1365
2540; RV32-NEXT:    csrr a4, vlenb
2541; RV32-NEXT:    slli a4, a4, 3
2542; RV32-NEXT:    add a4, sp, a4
2543; RV32-NEXT:    addi a4, a4, 16
2544; RV32-NEXT:    vl8r.v v16, (a4) # Unknown-size Folded Reload
2545; RV32-NEXT:    vor.vv v16, v16, v24
2546; RV32-NEXT:    vsetvli a4, zero, e32, m8, ta, ma
2547; RV32-NEXT:    vmv.v.x v24, a1
2548; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2549; RV32-NEXT:    vor.vv v8, v16, v8
2550; RV32-NEXT:    vsrl.vi v16, v8, 4
2551; RV32-NEXT:    vand.vv v8, v8, v24
2552; RV32-NEXT:    vand.vv v16, v16, v24
2553; RV32-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
2554; RV32-NEXT:    vmv.v.x v24, a2
2555; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2556; RV32-NEXT:    vsll.vi v8, v8, 4
2557; RV32-NEXT:    vor.vv v8, v16, v8
2558; RV32-NEXT:    vsrl.vi v16, v8, 2
2559; RV32-NEXT:    vand.vv v8, v8, v24
2560; RV32-NEXT:    vand.vv v16, v16, v24
2561; RV32-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
2562; RV32-NEXT:    vmv.v.x v24, a3
2563; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2564; RV32-NEXT:    vsll.vi v8, v8, 2
2565; RV32-NEXT:    vor.vv v8, v16, v8
2566; RV32-NEXT:    vsrl.vi v16, v8, 1
2567; RV32-NEXT:    vand.vv v8, v8, v24
2568; RV32-NEXT:    vand.vv v16, v16, v24
2569; RV32-NEXT:    vadd.vv v8, v8, v8
2570; RV32-NEXT:    vor.vv v8, v16, v8
2571; RV32-NEXT:    csrr a0, vlenb
2572; RV32-NEXT:    slli a0, a0, 4
2573; RV32-NEXT:    add sp, sp, a0
2574; RV32-NEXT:    .cfi_def_cfa sp, 16
2575; RV32-NEXT:    addi sp, sp, 16
2576; RV32-NEXT:    .cfi_def_cfa_offset 0
2577; RV32-NEXT:    ret
2578;
2579; RV64-LABEL: vp_bitreverse_nxv7i64_unmasked:
2580; RV64:       # %bb.0:
2581; RV64-NEXT:    addi sp, sp, -16
2582; RV64-NEXT:    .cfi_def_cfa_offset 16
2583; RV64-NEXT:    csrr a1, vlenb
2584; RV64-NEXT:    slli a1, a1, 3
2585; RV64-NEXT:    sub sp, sp, a1
2586; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
2587; RV64-NEXT:    lui a1, 4080
2588; RV64-NEXT:    li a2, 255
2589; RV64-NEXT:    li a3, 56
2590; RV64-NEXT:    lui a4, 16
2591; RV64-NEXT:    li a5, 40
2592; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2593; RV64-NEXT:    vsrl.vi v24, v8, 24
2594; RV64-NEXT:    addiw a0, a4, -256
2595; RV64-NEXT:    vsrl.vx v16, v8, a3
2596; RV64-NEXT:    vsrl.vx v0, v8, a5
2597; RV64-NEXT:    vand.vx v0, v0, a0
2598; RV64-NEXT:    vor.vv v16, v0, v16
2599; RV64-NEXT:    addi a4, sp, 16
2600; RV64-NEXT:    vs8r.v v16, (a4) # Unknown-size Folded Spill
2601; RV64-NEXT:    vsrl.vi v0, v8, 8
2602; RV64-NEXT:    slli a2, a2, 24
2603; RV64-NEXT:    vand.vx v24, v24, a1
2604; RV64-NEXT:    vand.vx v0, v0, a2
2605; RV64-NEXT:    vor.vv v24, v0, v24
2606; RV64-NEXT:    vand.vx v0, v8, a1
2607; RV64-NEXT:    vsll.vi v0, v0, 24
2608; RV64-NEXT:    vand.vx v16, v8, a2
2609; RV64-NEXT:    vsll.vi v16, v16, 8
2610; RV64-NEXT:    vor.vv v0, v0, v16
2611; RV64-NEXT:    vsll.vx v16, v8, a3
2612; RV64-NEXT:    vand.vx v8, v8, a0
2613; RV64-NEXT:    vsll.vx v8, v8, a5
2614; RV64-NEXT:    vor.vv v8, v16, v8
2615; RV64-NEXT:    lui a0, 61681
2616; RV64-NEXT:    lui a1, 209715
2617; RV64-NEXT:    lui a2, 349525
2618; RV64-NEXT:    addiw a0, a0, -241
2619; RV64-NEXT:    addiw a1, a1, 819
2620; RV64-NEXT:    addiw a2, a2, 1365
2621; RV64-NEXT:    slli a3, a0, 32
2622; RV64-NEXT:    slli a4, a1, 32
2623; RV64-NEXT:    add a0, a0, a3
2624; RV64-NEXT:    slli a3, a2, 32
2625; RV64-NEXT:    add a1, a1, a4
2626; RV64-NEXT:    add a2, a2, a3
2627; RV64-NEXT:    addi a3, sp, 16
2628; RV64-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
2629; RV64-NEXT:    vor.vv v16, v24, v16
2630; RV64-NEXT:    vor.vv v8, v8, v0
2631; RV64-NEXT:    vor.vv v8, v8, v16
2632; RV64-NEXT:    vsrl.vi v16, v8, 4
2633; RV64-NEXT:    vand.vx v8, v8, a0
2634; RV64-NEXT:    vand.vx v16, v16, a0
2635; RV64-NEXT:    vsll.vi v8, v8, 4
2636; RV64-NEXT:    vor.vv v8, v16, v8
2637; RV64-NEXT:    vsrl.vi v16, v8, 2
2638; RV64-NEXT:    vand.vx v8, v8, a1
2639; RV64-NEXT:    vand.vx v16, v16, a1
2640; RV64-NEXT:    vsll.vi v8, v8, 2
2641; RV64-NEXT:    vor.vv v8, v16, v8
2642; RV64-NEXT:    vsrl.vi v16, v8, 1
2643; RV64-NEXT:    vand.vx v8, v8, a2
2644; RV64-NEXT:    vand.vx v16, v16, a2
2645; RV64-NEXT:    vadd.vv v8, v8, v8
2646; RV64-NEXT:    vor.vv v8, v16, v8
2647; RV64-NEXT:    csrr a0, vlenb
2648; RV64-NEXT:    slli a0, a0, 3
2649; RV64-NEXT:    add sp, sp, a0
2650; RV64-NEXT:    .cfi_def_cfa sp, 16
2651; RV64-NEXT:    addi sp, sp, 16
2652; RV64-NEXT:    .cfi_def_cfa_offset 0
2653; RV64-NEXT:    ret
2654;
2655; CHECK-ZVBB-LABEL: vp_bitreverse_nxv7i64_unmasked:
2656; CHECK-ZVBB:       # %bb.0:
2657; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2658; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
2659; CHECK-ZVBB-NEXT:    ret
2660  %v = call <vscale x 7 x i64> @llvm.vp.bitreverse.nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x i1> splat (i1 true), i32 %evl)
2661  ret <vscale x 7 x i64> %v
2662}
2663
2664declare <vscale x 8 x i64> @llvm.vp.bitreverse.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i1>, i32)
2665
2666define <vscale x 8 x i64> @vp_bitreverse_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2667; RV32-LABEL: vp_bitreverse_nxv8i64:
2668; RV32:       # %bb.0:
2669; RV32-NEXT:    addi sp, sp, -16
2670; RV32-NEXT:    .cfi_def_cfa_offset 16
2671; RV32-NEXT:    csrr a1, vlenb
2672; RV32-NEXT:    li a2, 24
2673; RV32-NEXT:    mul a1, a1, a2
2674; RV32-NEXT:    sub sp, sp, a1
2675; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
2676; RV32-NEXT:    lui a1, 1044480
2677; RV32-NEXT:    li a2, 56
2678; RV32-NEXT:    lui a3, 16
2679; RV32-NEXT:    li a4, 40
2680; RV32-NEXT:    addi a5, sp, 8
2681; RV32-NEXT:    sw a1, 8(sp)
2682; RV32-NEXT:    sw zero, 12(sp)
2683; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2684; RV32-NEXT:    vsll.vx v16, v8, a2, v0.t
2685; RV32-NEXT:    addi a1, a3, -256
2686; RV32-NEXT:    vand.vx v24, v8, a1, v0.t
2687; RV32-NEXT:    vsll.vx v24, v24, a4, v0.t
2688; RV32-NEXT:    vor.vv v16, v16, v24, v0.t
2689; RV32-NEXT:    csrr a3, vlenb
2690; RV32-NEXT:    slli a3, a3, 4
2691; RV32-NEXT:    add a3, sp, a3
2692; RV32-NEXT:    addi a3, a3, 16
2693; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
2694; RV32-NEXT:    vlse64.v v16, (a5), zero
2695; RV32-NEXT:    csrr a3, vlenb
2696; RV32-NEXT:    slli a3, a3, 3
2697; RV32-NEXT:    add a3, sp, a3
2698; RV32-NEXT:    addi a3, a3, 16
2699; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
2700; RV32-NEXT:    lui a3, 4080
2701; RV32-NEXT:    vand.vx v24, v8, a3, v0.t
2702; RV32-NEXT:    vsll.vi v24, v24, 24, v0.t
2703; RV32-NEXT:    addi a5, sp, 16
2704; RV32-NEXT:    vs8r.v v24, (a5) # Unknown-size Folded Spill
2705; RV32-NEXT:    vand.vv v24, v8, v16, v0.t
2706; RV32-NEXT:    vsll.vi v16, v24, 8, v0.t
2707; RV32-NEXT:    vl8r.v v24, (a5) # Unknown-size Folded Reload
2708; RV32-NEXT:    vor.vv v16, v24, v16, v0.t
2709; RV32-NEXT:    csrr a5, vlenb
2710; RV32-NEXT:    slli a5, a5, 4
2711; RV32-NEXT:    add a5, sp, a5
2712; RV32-NEXT:    addi a5, a5, 16
2713; RV32-NEXT:    vl8r.v v24, (a5) # Unknown-size Folded Reload
2714; RV32-NEXT:    vor.vv v16, v24, v16, v0.t
2715; RV32-NEXT:    csrr a5, vlenb
2716; RV32-NEXT:    slli a5, a5, 4
2717; RV32-NEXT:    add a5, sp, a5
2718; RV32-NEXT:    addi a5, a5, 16
2719; RV32-NEXT:    vs8r.v v16, (a5) # Unknown-size Folded Spill
2720; RV32-NEXT:    vsrl.vx v16, v8, a2, v0.t
2721; RV32-NEXT:    vsrl.vx v24, v8, a4, v0.t
2722; RV32-NEXT:    vand.vx v24, v24, a1, v0.t
2723; RV32-NEXT:    vor.vv v16, v24, v16, v0.t
2724; RV32-NEXT:    addi a1, sp, 16
2725; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
2726; RV32-NEXT:    vsrl.vi v24, v8, 24, v0.t
2727; RV32-NEXT:    vand.vx v24, v24, a3, v0.t
2728; RV32-NEXT:    vsrl.vi v8, v8, 8, v0.t
2729; RV32-NEXT:    csrr a1, vlenb
2730; RV32-NEXT:    slli a1, a1, 3
2731; RV32-NEXT:    add a1, sp, a1
2732; RV32-NEXT:    addi a1, a1, 16
2733; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
2734; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
2735; RV32-NEXT:    vor.vv v8, v8, v24, v0.t
2736; RV32-NEXT:    addi a1, sp, 16
2737; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
2738; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
2739; RV32-NEXT:    lui a1, 61681
2740; RV32-NEXT:    lui a2, 209715
2741; RV32-NEXT:    lui a3, 349525
2742; RV32-NEXT:    addi a1, a1, -241
2743; RV32-NEXT:    addi a2, a2, 819
2744; RV32-NEXT:    addi a3, a3, 1365
2745; RV32-NEXT:    vsetvli a4, zero, e32, m8, ta, ma
2746; RV32-NEXT:    vmv.v.x v24, a1
2747; RV32-NEXT:    csrr a1, vlenb
2748; RV32-NEXT:    slli a1, a1, 4
2749; RV32-NEXT:    add a1, sp, a1
2750; RV32-NEXT:    addi a1, a1, 16
2751; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
2752; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2753; RV32-NEXT:    vor.vv v8, v16, v8, v0.t
2754; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
2755; RV32-NEXT:    vand.vv v16, v16, v24, v0.t
2756; RV32-NEXT:    vand.vv v24, v8, v24, v0.t
2757; RV32-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
2758; RV32-NEXT:    vmv.v.x v8, a2
2759; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2760; RV32-NEXT:    vsll.vi v24, v24, 4, v0.t
2761; RV32-NEXT:    vor.vv v24, v16, v24, v0.t
2762; RV32-NEXT:    vsrl.vi v16, v24, 2, v0.t
2763; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
2764; RV32-NEXT:    vand.vv v24, v24, v8, v0.t
2765; RV32-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
2766; RV32-NEXT:    vmv.v.x v8, a3
2767; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2768; RV32-NEXT:    vsll.vi v24, v24, 2, v0.t
2769; RV32-NEXT:    vor.vv v16, v16, v24, v0.t
2770; RV32-NEXT:    vsrl.vi v24, v16, 1, v0.t
2771; RV32-NEXT:    vand.vv v24, v24, v8, v0.t
2772; RV32-NEXT:    vand.vv v8, v16, v8, v0.t
2773; RV32-NEXT:    vsll.vi v8, v8, 1, v0.t
2774; RV32-NEXT:    vor.vv v8, v24, v8, v0.t
2775; RV32-NEXT:    csrr a0, vlenb
2776; RV32-NEXT:    li a1, 24
2777; RV32-NEXT:    mul a0, a0, a1
2778; RV32-NEXT:    add sp, sp, a0
2779; RV32-NEXT:    .cfi_def_cfa sp, 16
2780; RV32-NEXT:    addi sp, sp, 16
2781; RV32-NEXT:    .cfi_def_cfa_offset 0
2782; RV32-NEXT:    ret
2783;
2784; RV64-LABEL: vp_bitreverse_nxv8i64:
2785; RV64:       # %bb.0:
2786; RV64-NEXT:    addi sp, sp, -16
2787; RV64-NEXT:    .cfi_def_cfa_offset 16
2788; RV64-NEXT:    csrr a1, vlenb
2789; RV64-NEXT:    slli a1, a1, 3
2790; RV64-NEXT:    sub sp, sp, a1
2791; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
2792; RV64-NEXT:    lui a1, 4080
2793; RV64-NEXT:    li a2, 255
2794; RV64-NEXT:    li a3, 56
2795; RV64-NEXT:    lui a4, 16
2796; RV64-NEXT:    li a5, 40
2797; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2798; RV64-NEXT:    vand.vx v16, v8, a1, v0.t
2799; RV64-NEXT:    slli a2, a2, 24
2800; RV64-NEXT:    addiw a0, a4, -256
2801; RV64-NEXT:    vsll.vi v16, v16, 24, v0.t
2802; RV64-NEXT:    vand.vx v24, v8, a2, v0.t
2803; RV64-NEXT:    vsll.vi v24, v24, 8, v0.t
2804; RV64-NEXT:    vor.vv v16, v16, v24, v0.t
2805; RV64-NEXT:    addi a4, sp, 16
2806; RV64-NEXT:    vs8r.v v16, (a4) # Unknown-size Folded Spill
2807; RV64-NEXT:    vsll.vx v24, v8, a3, v0.t
2808; RV64-NEXT:    vand.vx v16, v8, a0, v0.t
2809; RV64-NEXT:    vsll.vx v16, v16, a5, v0.t
2810; RV64-NEXT:    vor.vv v16, v24, v16, v0.t
2811; RV64-NEXT:    vl8r.v v24, (a4) # Unknown-size Folded Reload
2812; RV64-NEXT:    vor.vv v16, v16, v24, v0.t
2813; RV64-NEXT:    vs8r.v v16, (a4) # Unknown-size Folded Spill
2814; RV64-NEXT:    vsrl.vx v24, v8, a3, v0.t
2815; RV64-NEXT:    vsrl.vx v16, v8, a5, v0.t
2816; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
2817; RV64-NEXT:    vor.vv v24, v16, v24, v0.t
2818; RV64-NEXT:    vsrl.vi v16, v8, 24, v0.t
2819; RV64-NEXT:    vand.vx v16, v16, a1, v0.t
2820; RV64-NEXT:    vsrl.vi v8, v8, 8, v0.t
2821; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
2822; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
2823; RV64-NEXT:    vor.vv v8, v8, v24, v0.t
2824; RV64-NEXT:    lui a0, 61681
2825; RV64-NEXT:    lui a1, 209715
2826; RV64-NEXT:    lui a2, 349525
2827; RV64-NEXT:    addiw a0, a0, -241
2828; RV64-NEXT:    addiw a1, a1, 819
2829; RV64-NEXT:    addiw a2, a2, 1365
2830; RV64-NEXT:    slli a3, a0, 32
2831; RV64-NEXT:    slli a4, a1, 32
2832; RV64-NEXT:    add a0, a0, a3
2833; RV64-NEXT:    slli a3, a2, 32
2834; RV64-NEXT:    add a1, a1, a4
2835; RV64-NEXT:    add a2, a2, a3
2836; RV64-NEXT:    addi a3, sp, 16
2837; RV64-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
2838; RV64-NEXT:    vor.vv v8, v16, v8, v0.t
2839; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
2840; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
2841; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
2842; RV64-NEXT:    vsll.vi v8, v8, 4, v0.t
2843; RV64-NEXT:    vor.vv v8, v16, v8, v0.t
2844; RV64-NEXT:    vsrl.vi v16, v8, 2, v0.t
2845; RV64-NEXT:    vand.vx v16, v16, a1, v0.t
2846; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
2847; RV64-NEXT:    vsll.vi v8, v8, 2, v0.t
2848; RV64-NEXT:    vor.vv v8, v16, v8, v0.t
2849; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
2850; RV64-NEXT:    vand.vx v16, v16, a2, v0.t
2851; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
2852; RV64-NEXT:    vsll.vi v8, v8, 1, v0.t
2853; RV64-NEXT:    vor.vv v8, v16, v8, v0.t
2854; RV64-NEXT:    csrr a0, vlenb
2855; RV64-NEXT:    slli a0, a0, 3
2856; RV64-NEXT:    add sp, sp, a0
2857; RV64-NEXT:    .cfi_def_cfa sp, 16
2858; RV64-NEXT:    addi sp, sp, 16
2859; RV64-NEXT:    .cfi_def_cfa_offset 0
2860; RV64-NEXT:    ret
2861;
2862; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i64:
2863; CHECK-ZVBB:       # %bb.0:
2864; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2865; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
2866; CHECK-ZVBB-NEXT:    ret
2867  %v = call <vscale x 8 x i64> @llvm.vp.bitreverse.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 %evl)
2868  ret <vscale x 8 x i64> %v
2869}
2870
2871define <vscale x 8 x i64> @vp_bitreverse_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32 zeroext %evl) {
2872; RV32-LABEL: vp_bitreverse_nxv8i64_unmasked:
2873; RV32:       # %bb.0:
2874; RV32-NEXT:    addi sp, sp, -16
2875; RV32-NEXT:    .cfi_def_cfa_offset 16
2876; RV32-NEXT:    csrr a1, vlenb
2877; RV32-NEXT:    slli a1, a1, 4
2878; RV32-NEXT:    sub sp, sp, a1
2879; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
2880; RV32-NEXT:    lui a1, 1044480
2881; RV32-NEXT:    li a2, 56
2882; RV32-NEXT:    lui a3, 16
2883; RV32-NEXT:    li a4, 40
2884; RV32-NEXT:    lui a5, 4080
2885; RV32-NEXT:    addi a6, sp, 8
2886; RV32-NEXT:    sw a1, 8(sp)
2887; RV32-NEXT:    sw zero, 12(sp)
2888; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2889; RV32-NEXT:    vsll.vx v16, v8, a2
2890; RV32-NEXT:    addi a1, a3, -256
2891; RV32-NEXT:    vsrl.vx v24, v8, a2
2892; RV32-NEXT:    vsrl.vx v0, v8, a4
2893; RV32-NEXT:    vand.vx v0, v0, a1
2894; RV32-NEXT:    vor.vv v24, v0, v24
2895; RV32-NEXT:    addi a2, sp, 16
2896; RV32-NEXT:    vs8r.v v24, (a2) # Unknown-size Folded Spill
2897; RV32-NEXT:    vand.vx v24, v8, a1
2898; RV32-NEXT:    vsll.vx v24, v24, a4
2899; RV32-NEXT:    vor.vv v16, v16, v24
2900; RV32-NEXT:    csrr a1, vlenb
2901; RV32-NEXT:    slli a1, a1, 3
2902; RV32-NEXT:    add a1, sp, a1
2903; RV32-NEXT:    addi a1, a1, 16
2904; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
2905; RV32-NEXT:    vlse64.v v24, (a6), zero
2906; RV32-NEXT:    vsrl.vi v16, v8, 24
2907; RV32-NEXT:    vand.vx v16, v16, a5
2908; RV32-NEXT:    vsrl.vi v0, v8, 8
2909; RV32-NEXT:    vand.vv v0, v0, v24
2910; RV32-NEXT:    vor.vv v16, v0, v16
2911; RV32-NEXT:    vand.vv v24, v8, v24
2912; RV32-NEXT:    vand.vx v8, v8, a5
2913; RV32-NEXT:    vsll.vi v8, v8, 24
2914; RV32-NEXT:    vsll.vi v24, v24, 8
2915; RV32-NEXT:    vor.vv v24, v8, v24
2916; RV32-NEXT:    addi a1, sp, 16
2917; RV32-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
2918; RV32-NEXT:    vor.vv v8, v16, v8
2919; RV32-NEXT:    lui a1, 61681
2920; RV32-NEXT:    lui a2, 209715
2921; RV32-NEXT:    lui a3, 349525
2922; RV32-NEXT:    addi a1, a1, -241
2923; RV32-NEXT:    addi a2, a2, 819
2924; RV32-NEXT:    addi a3, a3, 1365
2925; RV32-NEXT:    csrr a4, vlenb
2926; RV32-NEXT:    slli a4, a4, 3
2927; RV32-NEXT:    add a4, sp, a4
2928; RV32-NEXT:    addi a4, a4, 16
2929; RV32-NEXT:    vl8r.v v16, (a4) # Unknown-size Folded Reload
2930; RV32-NEXT:    vor.vv v16, v16, v24
2931; RV32-NEXT:    vsetvli a4, zero, e32, m8, ta, ma
2932; RV32-NEXT:    vmv.v.x v24, a1
2933; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2934; RV32-NEXT:    vor.vv v8, v16, v8
2935; RV32-NEXT:    vsrl.vi v16, v8, 4
2936; RV32-NEXT:    vand.vv v8, v8, v24
2937; RV32-NEXT:    vand.vv v16, v16, v24
2938; RV32-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
2939; RV32-NEXT:    vmv.v.x v24, a2
2940; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2941; RV32-NEXT:    vsll.vi v8, v8, 4
2942; RV32-NEXT:    vor.vv v8, v16, v8
2943; RV32-NEXT:    vsrl.vi v16, v8, 2
2944; RV32-NEXT:    vand.vv v8, v8, v24
2945; RV32-NEXT:    vand.vv v16, v16, v24
2946; RV32-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
2947; RV32-NEXT:    vmv.v.x v24, a3
2948; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2949; RV32-NEXT:    vsll.vi v8, v8, 2
2950; RV32-NEXT:    vor.vv v8, v16, v8
2951; RV32-NEXT:    vsrl.vi v16, v8, 1
2952; RV32-NEXT:    vand.vv v8, v8, v24
2953; RV32-NEXT:    vand.vv v16, v16, v24
2954; RV32-NEXT:    vadd.vv v8, v8, v8
2955; RV32-NEXT:    vor.vv v8, v16, v8
2956; RV32-NEXT:    csrr a0, vlenb
2957; RV32-NEXT:    slli a0, a0, 4
2958; RV32-NEXT:    add sp, sp, a0
2959; RV32-NEXT:    .cfi_def_cfa sp, 16
2960; RV32-NEXT:    addi sp, sp, 16
2961; RV32-NEXT:    .cfi_def_cfa_offset 0
2962; RV32-NEXT:    ret
2963;
2964; RV64-LABEL: vp_bitreverse_nxv8i64_unmasked:
2965; RV64:       # %bb.0:
2966; RV64-NEXT:    addi sp, sp, -16
2967; RV64-NEXT:    .cfi_def_cfa_offset 16
2968; RV64-NEXT:    csrr a1, vlenb
2969; RV64-NEXT:    slli a1, a1, 3
2970; RV64-NEXT:    sub sp, sp, a1
2971; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
2972; RV64-NEXT:    lui a1, 4080
2973; RV64-NEXT:    li a2, 255
2974; RV64-NEXT:    li a3, 56
2975; RV64-NEXT:    lui a4, 16
2976; RV64-NEXT:    li a5, 40
2977; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2978; RV64-NEXT:    vsrl.vi v24, v8, 24
2979; RV64-NEXT:    addiw a0, a4, -256
2980; RV64-NEXT:    vsrl.vx v16, v8, a3
2981; RV64-NEXT:    vsrl.vx v0, v8, a5
2982; RV64-NEXT:    vand.vx v0, v0, a0
2983; RV64-NEXT:    vor.vv v16, v0, v16
2984; RV64-NEXT:    addi a4, sp, 16
2985; RV64-NEXT:    vs8r.v v16, (a4) # Unknown-size Folded Spill
2986; RV64-NEXT:    vsrl.vi v0, v8, 8
2987; RV64-NEXT:    slli a2, a2, 24
2988; RV64-NEXT:    vand.vx v24, v24, a1
2989; RV64-NEXT:    vand.vx v0, v0, a2
2990; RV64-NEXT:    vor.vv v24, v0, v24
2991; RV64-NEXT:    vand.vx v0, v8, a1
2992; RV64-NEXT:    vsll.vi v0, v0, 24
2993; RV64-NEXT:    vand.vx v16, v8, a2
2994; RV64-NEXT:    vsll.vi v16, v16, 8
2995; RV64-NEXT:    vor.vv v0, v0, v16
2996; RV64-NEXT:    vsll.vx v16, v8, a3
2997; RV64-NEXT:    vand.vx v8, v8, a0
2998; RV64-NEXT:    vsll.vx v8, v8, a5
2999; RV64-NEXT:    vor.vv v8, v16, v8
3000; RV64-NEXT:    lui a0, 61681
3001; RV64-NEXT:    lui a1, 209715
3002; RV64-NEXT:    lui a2, 349525
3003; RV64-NEXT:    addiw a0, a0, -241
3004; RV64-NEXT:    addiw a1, a1, 819
3005; RV64-NEXT:    addiw a2, a2, 1365
3006; RV64-NEXT:    slli a3, a0, 32
3007; RV64-NEXT:    slli a4, a1, 32
3008; RV64-NEXT:    add a0, a0, a3
3009; RV64-NEXT:    slli a3, a2, 32
3010; RV64-NEXT:    add a1, a1, a4
3011; RV64-NEXT:    add a2, a2, a3
3012; RV64-NEXT:    addi a3, sp, 16
3013; RV64-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
3014; RV64-NEXT:    vor.vv v16, v24, v16
3015; RV64-NEXT:    vor.vv v8, v8, v0
3016; RV64-NEXT:    vor.vv v8, v8, v16
3017; RV64-NEXT:    vsrl.vi v16, v8, 4
3018; RV64-NEXT:    vand.vx v8, v8, a0
3019; RV64-NEXT:    vand.vx v16, v16, a0
3020; RV64-NEXT:    vsll.vi v8, v8, 4
3021; RV64-NEXT:    vor.vv v8, v16, v8
3022; RV64-NEXT:    vsrl.vi v16, v8, 2
3023; RV64-NEXT:    vand.vx v8, v8, a1
3024; RV64-NEXT:    vand.vx v16, v16, a1
3025; RV64-NEXT:    vsll.vi v8, v8, 2
3026; RV64-NEXT:    vor.vv v8, v16, v8
3027; RV64-NEXT:    vsrl.vi v16, v8, 1
3028; RV64-NEXT:    vand.vx v8, v8, a2
3029; RV64-NEXT:    vand.vx v16, v16, a2
3030; RV64-NEXT:    vadd.vv v8, v8, v8
3031; RV64-NEXT:    vor.vv v8, v16, v8
3032; RV64-NEXT:    csrr a0, vlenb
3033; RV64-NEXT:    slli a0, a0, 3
3034; RV64-NEXT:    add sp, sp, a0
3035; RV64-NEXT:    .cfi_def_cfa sp, 16
3036; RV64-NEXT:    addi sp, sp, 16
3037; RV64-NEXT:    .cfi_def_cfa_offset 0
3038; RV64-NEXT:    ret
3039;
3040; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i64_unmasked:
3041; CHECK-ZVBB:       # %bb.0:
3042; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
3043; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
3044; CHECK-ZVBB-NEXT:    ret
3045  %v = call <vscale x 8 x i64> @llvm.vp.bitreverse.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
3046  ret <vscale x 8 x i64> %v
3047}
3048
3049; Test splitting. Use i16 version for easier check.
3050declare <vscale x 64 x i16> @llvm.vp.bitreverse.nxv64i16(<vscale x 64 x i16>, <vscale x 64 x i1>, i32)
3051
3052define <vscale x 64 x i16> @vp_bitreverse_nxv64i16(<vscale x 64 x i16> %va, <vscale x 64 x i1> %m, i32 zeroext %evl) {
3053; CHECK-LABEL: vp_bitreverse_nxv64i16:
3054; CHECK:       # %bb.0:
3055; CHECK-NEXT:    addi sp, sp, -16
3056; CHECK-NEXT:    .cfi_def_cfa_offset 16
3057; CHECK-NEXT:    csrr a1, vlenb
3058; CHECK-NEXT:    slli a1, a1, 4
3059; CHECK-NEXT:    sub sp, sp, a1
3060; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
3061; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
3062; CHECK-NEXT:    vmv1r.v v24, v0
3063; CHECK-NEXT:    csrr a1, vlenb
3064; CHECK-NEXT:    slli a1, a1, 3
3065; CHECK-NEXT:    add a1, sp, a1
3066; CHECK-NEXT:    addi a1, a1, 16
3067; CHECK-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
3068; CHECK-NEXT:    csrr a3, vlenb
3069; CHECK-NEXT:    lui a1, 1
3070; CHECK-NEXT:    lui a2, 3
3071; CHECK-NEXT:    srli a4, a3, 1
3072; CHECK-NEXT:    slli a3, a3, 2
3073; CHECK-NEXT:    vslidedown.vx v0, v0, a4
3074; CHECK-NEXT:    sub a4, a0, a3
3075; CHECK-NEXT:    sltu a5, a0, a4
3076; CHECK-NEXT:    addi a5, a5, -1
3077; CHECK-NEXT:    and a5, a5, a4
3078; CHECK-NEXT:    lui a6, 5
3079; CHECK-NEXT:    addi a4, a1, -241
3080; CHECK-NEXT:    addi a2, a2, 819
3081; CHECK-NEXT:    addi a1, a6, 1365
3082; CHECK-NEXT:    vsetvli zero, a5, e16, m8, ta, ma
3083; CHECK-NEXT:    vsrl.vi v8, v16, 8, v0.t
3084; CHECK-NEXT:    vsll.vi v16, v16, 8, v0.t
3085; CHECK-NEXT:    vor.vv v8, v16, v8, v0.t
3086; CHECK-NEXT:    vsrl.vi v16, v8, 4, v0.t
3087; CHECK-NEXT:    vand.vx v16, v16, a4, v0.t
3088; CHECK-NEXT:    vand.vx v8, v8, a4, v0.t
3089; CHECK-NEXT:    vsll.vi v8, v8, 4, v0.t
3090; CHECK-NEXT:    vor.vv v8, v16, v8, v0.t
3091; CHECK-NEXT:    vsrl.vi v16, v8, 2, v0.t
3092; CHECK-NEXT:    vand.vx v16, v16, a2, v0.t
3093; CHECK-NEXT:    vand.vx v8, v8, a2, v0.t
3094; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
3095; CHECK-NEXT:    vor.vv v8, v16, v8, v0.t
3096; CHECK-NEXT:    vsrl.vi v16, v8, 1, v0.t
3097; CHECK-NEXT:    vand.vx v16, v16, a1, v0.t
3098; CHECK-NEXT:    vand.vx v8, v8, a1, v0.t
3099; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
3100; CHECK-NEXT:    vor.vv v8, v16, v8, v0.t
3101; CHECK-NEXT:    addi a5, sp, 16
3102; CHECK-NEXT:    vs8r.v v8, (a5) # Unknown-size Folded Spill
3103; CHECK-NEXT:    bltu a0, a3, .LBB46_2
3104; CHECK-NEXT:  # %bb.1:
3105; CHECK-NEXT:    mv a0, a3
3106; CHECK-NEXT:  .LBB46_2:
3107; CHECK-NEXT:    vmv1r.v v0, v24
3108; CHECK-NEXT:    csrr a3, vlenb
3109; CHECK-NEXT:    slli a3, a3, 3
3110; CHECK-NEXT:    add a3, sp, a3
3111; CHECK-NEXT:    addi a3, a3, 16
3112; CHECK-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
3113; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
3114; CHECK-NEXT:    vsrl.vi v16, v8, 8, v0.t
3115; CHECK-NEXT:    vsll.vi v8, v8, 8, v0.t
3116; CHECK-NEXT:    vor.vv v8, v8, v16, v0.t
3117; CHECK-NEXT:    vsrl.vi v16, v8, 4, v0.t
3118; CHECK-NEXT:    vand.vx v16, v16, a4, v0.t
3119; CHECK-NEXT:    vand.vx v8, v8, a4, v0.t
3120; CHECK-NEXT:    vsll.vi v8, v8, 4, v0.t
3121; CHECK-NEXT:    vor.vv v8, v16, v8, v0.t
3122; CHECK-NEXT:    vsrl.vi v16, v8, 2, v0.t
3123; CHECK-NEXT:    vand.vx v16, v16, a2, v0.t
3124; CHECK-NEXT:    vand.vx v8, v8, a2, v0.t
3125; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
3126; CHECK-NEXT:    vor.vv v8, v16, v8, v0.t
3127; CHECK-NEXT:    vsrl.vi v16, v8, 1, v0.t
3128; CHECK-NEXT:    vand.vx v16, v16, a1, v0.t
3129; CHECK-NEXT:    vand.vx v8, v8, a1, v0.t
3130; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
3131; CHECK-NEXT:    vor.vv v8, v16, v8, v0.t
3132; CHECK-NEXT:    addi a0, sp, 16
3133; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
3134; CHECK-NEXT:    csrr a0, vlenb
3135; CHECK-NEXT:    slli a0, a0, 4
3136; CHECK-NEXT:    add sp, sp, a0
3137; CHECK-NEXT:    .cfi_def_cfa sp, 16
3138; CHECK-NEXT:    addi sp, sp, 16
3139; CHECK-NEXT:    .cfi_def_cfa_offset 0
3140; CHECK-NEXT:    ret
3141;
3142; CHECK-ZVBB-LABEL: vp_bitreverse_nxv64i16:
3143; CHECK-ZVBB:       # %bb.0:
3144; CHECK-ZVBB-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
3145; CHECK-ZVBB-NEXT:    vmv1r.v v24, v0
3146; CHECK-ZVBB-NEXT:    csrr a1, vlenb
3147; CHECK-ZVBB-NEXT:    srli a2, a1, 1
3148; CHECK-ZVBB-NEXT:    slli a1, a1, 2
3149; CHECK-ZVBB-NEXT:    vslidedown.vx v0, v0, a2
3150; CHECK-ZVBB-NEXT:    sub a2, a0, a1
3151; CHECK-ZVBB-NEXT:    sltu a3, a0, a2
3152; CHECK-ZVBB-NEXT:    addi a3, a3, -1
3153; CHECK-ZVBB-NEXT:    and a2, a3, a2
3154; CHECK-ZVBB-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
3155; CHECK-ZVBB-NEXT:    vbrev.v v16, v16, v0.t
3156; CHECK-ZVBB-NEXT:    bltu a0, a1, .LBB46_2
3157; CHECK-ZVBB-NEXT:  # %bb.1:
3158; CHECK-ZVBB-NEXT:    mv a0, a1
3159; CHECK-ZVBB-NEXT:  .LBB46_2:
3160; CHECK-ZVBB-NEXT:    vmv1r.v v0, v24
3161; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
3162; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
3163; CHECK-ZVBB-NEXT:    ret
3164  %v = call <vscale x 64 x i16> @llvm.vp.bitreverse.nxv64i16(<vscale x 64 x i16> %va, <vscale x 64 x i1> %m, i32 %evl)
3165  ret <vscale x 64 x i16> %v
3166}
3167
3168define <vscale x 64 x i16> @vp_bitreverse_nxv64i16_unmasked(<vscale x 64 x i16> %va, i32 zeroext %evl) {
3169; CHECK-LABEL: vp_bitreverse_nxv64i16_unmasked:
3170; CHECK:       # %bb.0:
3171; CHECK-NEXT:    csrr a3, vlenb
3172; CHECK-NEXT:    lui a1, 1
3173; CHECK-NEXT:    lui a2, 3
3174; CHECK-NEXT:    slli a3, a3, 2
3175; CHECK-NEXT:    sub a4, a0, a3
3176; CHECK-NEXT:    sltu a5, a0, a4
3177; CHECK-NEXT:    addi a5, a5, -1
3178; CHECK-NEXT:    and a5, a5, a4
3179; CHECK-NEXT:    lui a6, 5
3180; CHECK-NEXT:    addi a4, a1, -241
3181; CHECK-NEXT:    addi a2, a2, 819
3182; CHECK-NEXT:    addi a1, a6, 1365
3183; CHECK-NEXT:    vsetvli zero, a5, e16, m8, ta, ma
3184; CHECK-NEXT:    vsrl.vi v24, v16, 8
3185; CHECK-NEXT:    vsll.vi v16, v16, 8
3186; CHECK-NEXT:    vor.vv v16, v16, v24
3187; CHECK-NEXT:    vsrl.vi v24, v16, 4
3188; CHECK-NEXT:    vand.vx v16, v16, a4
3189; CHECK-NEXT:    vand.vx v24, v24, a4
3190; CHECK-NEXT:    vsll.vi v16, v16, 4
3191; CHECK-NEXT:    vor.vv v16, v24, v16
3192; CHECK-NEXT:    vsrl.vi v24, v16, 2
3193; CHECK-NEXT:    vand.vx v16, v16, a2
3194; CHECK-NEXT:    vand.vx v24, v24, a2
3195; CHECK-NEXT:    vsll.vi v16, v16, 2
3196; CHECK-NEXT:    vor.vv v16, v24, v16
3197; CHECK-NEXT:    vsrl.vi v24, v16, 1
3198; CHECK-NEXT:    vand.vx v16, v16, a1
3199; CHECK-NEXT:    vand.vx v24, v24, a1
3200; CHECK-NEXT:    vadd.vv v16, v16, v16
3201; CHECK-NEXT:    vor.vv v16, v24, v16
3202; CHECK-NEXT:    bltu a0, a3, .LBB47_2
3203; CHECK-NEXT:  # %bb.1:
3204; CHECK-NEXT:    mv a0, a3
3205; CHECK-NEXT:  .LBB47_2:
3206; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
3207; CHECK-NEXT:    vsrl.vi v24, v8, 8
3208; CHECK-NEXT:    vsll.vi v8, v8, 8
3209; CHECK-NEXT:    vor.vv v8, v8, v24
3210; CHECK-NEXT:    vsrl.vi v24, v8, 4
3211; CHECK-NEXT:    vand.vx v8, v8, a4
3212; CHECK-NEXT:    vand.vx v24, v24, a4
3213; CHECK-NEXT:    vsll.vi v8, v8, 4
3214; CHECK-NEXT:    vor.vv v8, v24, v8
3215; CHECK-NEXT:    vsrl.vi v24, v8, 2
3216; CHECK-NEXT:    vand.vx v8, v8, a2
3217; CHECK-NEXT:    vand.vx v24, v24, a2
3218; CHECK-NEXT:    vsll.vi v8, v8, 2
3219; CHECK-NEXT:    vor.vv v8, v24, v8
3220; CHECK-NEXT:    vsrl.vi v24, v8, 1
3221; CHECK-NEXT:    vand.vx v8, v8, a1
3222; CHECK-NEXT:    vand.vx v24, v24, a1
3223; CHECK-NEXT:    vadd.vv v8, v8, v8
3224; CHECK-NEXT:    vor.vv v8, v24, v8
3225; CHECK-NEXT:    ret
3226;
3227; CHECK-ZVBB-LABEL: vp_bitreverse_nxv64i16_unmasked:
3228; CHECK-ZVBB:       # %bb.0:
3229; CHECK-ZVBB-NEXT:    csrr a1, vlenb
3230; CHECK-ZVBB-NEXT:    slli a1, a1, 2
3231; CHECK-ZVBB-NEXT:    sub a2, a0, a1
3232; CHECK-ZVBB-NEXT:    sltu a3, a0, a2
3233; CHECK-ZVBB-NEXT:    addi a3, a3, -1
3234; CHECK-ZVBB-NEXT:    and a2, a3, a2
3235; CHECK-ZVBB-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
3236; CHECK-ZVBB-NEXT:    vbrev.v v16, v16
3237; CHECK-ZVBB-NEXT:    bltu a0, a1, .LBB47_2
3238; CHECK-ZVBB-NEXT:  # %bb.1:
3239; CHECK-ZVBB-NEXT:    mv a0, a1
3240; CHECK-ZVBB-NEXT:  .LBB47_2:
3241; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
3242; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
3243; CHECK-ZVBB-NEXT:    ret
3244  %v = call <vscale x 64 x i16> @llvm.vp.bitreverse.nxv64i16(<vscale x 64 x i16> %va, <vscale x 64 x i1> splat (i1 true), i32 %evl)
3245  ret <vscale x 64 x i16> %v
3246}
3247
3248; Test promotion.
3249declare <vscale x 1 x i9> @llvm.vp.bitreverse.nxv1i9(<vscale x 1 x i9>, <vscale x 1 x i1>, i32)
3250define <vscale x 1 x i9> @vp_bitreverse_nxv1i9(<vscale x 1 x i9> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
3251; CHECK-LABEL: vp_bitreverse_nxv1i9:
3252; CHECK:       # %bb.0:
3253; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
3254; CHECK-NEXT:    vsrl.vi v9, v8, 8, v0.t
3255; CHECK-NEXT:    lui a0, 1
3256; CHECK-NEXT:    vsll.vi v8, v8, 8, v0.t
3257; CHECK-NEXT:    addi a0, a0, -241
3258; CHECK-NEXT:    vor.vv v8, v8, v9, v0.t
3259; CHECK-NEXT:    vsrl.vi v9, v8, 4, v0.t
3260; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
3261; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
3262; CHECK-NEXT:    lui a0, 3
3263; CHECK-NEXT:    addi a0, a0, 819
3264; CHECK-NEXT:    vsll.vi v8, v8, 4, v0.t
3265; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
3266; CHECK-NEXT:    vsrl.vi v9, v8, 2, v0.t
3267; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
3268; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
3269; CHECK-NEXT:    lui a0, 5
3270; CHECK-NEXT:    addi a0, a0, 1365
3271; CHECK-NEXT:    vsll.vi v8, v8, 2, v0.t
3272; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
3273; CHECK-NEXT:    vsrl.vi v9, v8, 1, v0.t
3274; CHECK-NEXT:    vand.vx v9, v9, a0, v0.t
3275; CHECK-NEXT:    vand.vx v8, v8, a0, v0.t
3276; CHECK-NEXT:    vsll.vi v8, v8, 1, v0.t
3277; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
3278; CHECK-NEXT:    vsrl.vi v8, v8, 7, v0.t
3279; CHECK-NEXT:    ret
3280;
3281; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i9:
3282; CHECK-ZVBB:       # %bb.0:
3283; CHECK-ZVBB-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
3284; CHECK-ZVBB-NEXT:    vbrev.v v8, v8, v0.t
3285; CHECK-ZVBB-NEXT:    vsrl.vi v8, v8, 7, v0.t
3286; CHECK-ZVBB-NEXT:    ret
3287  %v = call <vscale x 1 x i9> @llvm.vp.bitreverse.nxv1i9(<vscale x 1 x i9> %va, <vscale x 1 x i1> %m, i32 %evl)
3288  ret <vscale x 1 x i9> %v
3289}
3290