xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll (revision 9122c5235ec85ce0c0ad337e862b006e7b349d84)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
4; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB
5; RUN: llc -mtriple=riscv64 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB
6
7define <vscale x 1 x i8> @bitreverse_nxv1i8(<vscale x 1 x i8> %va) {
8; CHECK-LABEL: bitreverse_nxv1i8:
9; CHECK:       # %bb.0:
10; CHECK-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
11; CHECK-NEXT:    vsll.vi v9, v8, 4
12; CHECK-NEXT:    vsrl.vi v8, v8, 4
13; CHECK-NEXT:    li a0, 51
14; CHECK-NEXT:    vor.vv v8, v8, v9
15; CHECK-NEXT:    vsrl.vi v9, v8, 2
16; CHECK-NEXT:    vand.vx v8, v8, a0
17; CHECK-NEXT:    vand.vx v9, v9, a0
18; CHECK-NEXT:    li a0, 85
19; CHECK-NEXT:    vsll.vi v8, v8, 2
20; CHECK-NEXT:    vor.vv v8, v9, v8
21; CHECK-NEXT:    vsrl.vi v9, v8, 1
22; CHECK-NEXT:    vand.vx v8, v8, a0
23; CHECK-NEXT:    vand.vx v9, v9, a0
24; CHECK-NEXT:    vadd.vv v8, v8, v8
25; CHECK-NEXT:    vor.vv v8, v9, v8
26; CHECK-NEXT:    ret
27;
28; CHECK-ZVBB-LABEL: bitreverse_nxv1i8:
29; CHECK-ZVBB:       # %bb.0:
30; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
31; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
32; CHECK-ZVBB-NEXT:    ret
33  %a = call <vscale x 1 x i8> @llvm.bitreverse.nxv1i8(<vscale x 1 x i8> %va)
34  ret <vscale x 1 x i8> %a
35}
36declare <vscale x 1 x i8> @llvm.bitreverse.nxv1i8(<vscale x 1 x i8>)
37
38define <vscale x 2 x i8> @bitreverse_nxv2i8(<vscale x 2 x i8> %va) {
39; CHECK-LABEL: bitreverse_nxv2i8:
40; CHECK:       # %bb.0:
41; CHECK-NEXT:    vsetvli a0, zero, e8, mf4, ta, ma
42; CHECK-NEXT:    vsll.vi v9, v8, 4
43; CHECK-NEXT:    vsrl.vi v8, v8, 4
44; CHECK-NEXT:    li a0, 51
45; CHECK-NEXT:    vor.vv v8, v8, v9
46; CHECK-NEXT:    vsrl.vi v9, v8, 2
47; CHECK-NEXT:    vand.vx v8, v8, a0
48; CHECK-NEXT:    vand.vx v9, v9, a0
49; CHECK-NEXT:    li a0, 85
50; CHECK-NEXT:    vsll.vi v8, v8, 2
51; CHECK-NEXT:    vor.vv v8, v9, v8
52; CHECK-NEXT:    vsrl.vi v9, v8, 1
53; CHECK-NEXT:    vand.vx v8, v8, a0
54; CHECK-NEXT:    vand.vx v9, v9, a0
55; CHECK-NEXT:    vadd.vv v8, v8, v8
56; CHECK-NEXT:    vor.vv v8, v9, v8
57; CHECK-NEXT:    ret
58;
59; CHECK-ZVBB-LABEL: bitreverse_nxv2i8:
60; CHECK-ZVBB:       # %bb.0:
61; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e8, mf4, ta, ma
62; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
63; CHECK-ZVBB-NEXT:    ret
64  %a = call <vscale x 2 x i8> @llvm.bitreverse.nxv2i8(<vscale x 2 x i8> %va)
65  ret <vscale x 2 x i8> %a
66}
67declare <vscale x 2 x i8> @llvm.bitreverse.nxv2i8(<vscale x 2 x i8>)
68
69define <vscale x 4 x i8> @bitreverse_nxv4i8(<vscale x 4 x i8> %va) {
70; CHECK-LABEL: bitreverse_nxv4i8:
71; CHECK:       # %bb.0:
72; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
73; CHECK-NEXT:    vsll.vi v9, v8, 4
74; CHECK-NEXT:    vsrl.vi v8, v8, 4
75; CHECK-NEXT:    li a0, 51
76; CHECK-NEXT:    vor.vv v8, v8, v9
77; CHECK-NEXT:    vsrl.vi v9, v8, 2
78; CHECK-NEXT:    vand.vx v8, v8, a0
79; CHECK-NEXT:    vand.vx v9, v9, a0
80; CHECK-NEXT:    li a0, 85
81; CHECK-NEXT:    vsll.vi v8, v8, 2
82; CHECK-NEXT:    vor.vv v8, v9, v8
83; CHECK-NEXT:    vsrl.vi v9, v8, 1
84; CHECK-NEXT:    vand.vx v8, v8, a0
85; CHECK-NEXT:    vand.vx v9, v9, a0
86; CHECK-NEXT:    vadd.vv v8, v8, v8
87; CHECK-NEXT:    vor.vv v8, v9, v8
88; CHECK-NEXT:    ret
89;
90; CHECK-ZVBB-LABEL: bitreverse_nxv4i8:
91; CHECK-ZVBB:       # %bb.0:
92; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
93; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
94; CHECK-ZVBB-NEXT:    ret
95  %a = call <vscale x 4 x i8> @llvm.bitreverse.nxv4i8(<vscale x 4 x i8> %va)
96  ret <vscale x 4 x i8> %a
97}
98declare <vscale x 4 x i8> @llvm.bitreverse.nxv4i8(<vscale x 4 x i8>)
99
100define <vscale x 8 x i8> @bitreverse_nxv8i8(<vscale x 8 x i8> %va) {
101; CHECK-LABEL: bitreverse_nxv8i8:
102; CHECK:       # %bb.0:
103; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
104; CHECK-NEXT:    vsll.vi v9, v8, 4
105; CHECK-NEXT:    vsrl.vi v8, v8, 4
106; CHECK-NEXT:    li a0, 51
107; CHECK-NEXT:    vor.vv v8, v8, v9
108; CHECK-NEXT:    vsrl.vi v9, v8, 2
109; CHECK-NEXT:    vand.vx v8, v8, a0
110; CHECK-NEXT:    vand.vx v9, v9, a0
111; CHECK-NEXT:    li a0, 85
112; CHECK-NEXT:    vsll.vi v8, v8, 2
113; CHECK-NEXT:    vor.vv v8, v9, v8
114; CHECK-NEXT:    vsrl.vi v9, v8, 1
115; CHECK-NEXT:    vand.vx v8, v8, a0
116; CHECK-NEXT:    vand.vx v9, v9, a0
117; CHECK-NEXT:    vadd.vv v8, v8, v8
118; CHECK-NEXT:    vor.vv v8, v9, v8
119; CHECK-NEXT:    ret
120;
121; CHECK-ZVBB-LABEL: bitreverse_nxv8i8:
122; CHECK-ZVBB:       # %bb.0:
123; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
124; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
125; CHECK-ZVBB-NEXT:    ret
126  %a = call <vscale x 8 x i8> @llvm.bitreverse.nxv8i8(<vscale x 8 x i8> %va)
127  ret <vscale x 8 x i8> %a
128}
129declare <vscale x 8 x i8> @llvm.bitreverse.nxv8i8(<vscale x 8 x i8>)
130
131define <vscale x 16 x i8> @bitreverse_nxv16i8(<vscale x 16 x i8> %va) {
132; CHECK-LABEL: bitreverse_nxv16i8:
133; CHECK:       # %bb.0:
134; CHECK-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
135; CHECK-NEXT:    vsll.vi v10, v8, 4
136; CHECK-NEXT:    vsrl.vi v8, v8, 4
137; CHECK-NEXT:    li a0, 51
138; CHECK-NEXT:    vor.vv v8, v8, v10
139; CHECK-NEXT:    vsrl.vi v10, v8, 2
140; CHECK-NEXT:    vand.vx v8, v8, a0
141; CHECK-NEXT:    vand.vx v10, v10, a0
142; CHECK-NEXT:    li a0, 85
143; CHECK-NEXT:    vsll.vi v8, v8, 2
144; CHECK-NEXT:    vor.vv v8, v10, v8
145; CHECK-NEXT:    vsrl.vi v10, v8, 1
146; CHECK-NEXT:    vand.vx v8, v8, a0
147; CHECK-NEXT:    vand.vx v10, v10, a0
148; CHECK-NEXT:    vadd.vv v8, v8, v8
149; CHECK-NEXT:    vor.vv v8, v10, v8
150; CHECK-NEXT:    ret
151;
152; CHECK-ZVBB-LABEL: bitreverse_nxv16i8:
153; CHECK-ZVBB:       # %bb.0:
154; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
155; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
156; CHECK-ZVBB-NEXT:    ret
157  %a = call <vscale x 16 x i8> @llvm.bitreverse.nxv16i8(<vscale x 16 x i8> %va)
158  ret <vscale x 16 x i8> %a
159}
160declare <vscale x 16 x i8> @llvm.bitreverse.nxv16i8(<vscale x 16 x i8>)
161
162define <vscale x 32 x i8> @bitreverse_nxv32i8(<vscale x 32 x i8> %va) {
163; CHECK-LABEL: bitreverse_nxv32i8:
164; CHECK:       # %bb.0:
165; CHECK-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
166; CHECK-NEXT:    vsll.vi v12, v8, 4
167; CHECK-NEXT:    vsrl.vi v8, v8, 4
168; CHECK-NEXT:    li a0, 51
169; CHECK-NEXT:    vor.vv v8, v8, v12
170; CHECK-NEXT:    vsrl.vi v12, v8, 2
171; CHECK-NEXT:    vand.vx v8, v8, a0
172; CHECK-NEXT:    vand.vx v12, v12, a0
173; CHECK-NEXT:    li a0, 85
174; CHECK-NEXT:    vsll.vi v8, v8, 2
175; CHECK-NEXT:    vor.vv v8, v12, v8
176; CHECK-NEXT:    vsrl.vi v12, v8, 1
177; CHECK-NEXT:    vand.vx v8, v8, a0
178; CHECK-NEXT:    vand.vx v12, v12, a0
179; CHECK-NEXT:    vadd.vv v8, v8, v8
180; CHECK-NEXT:    vor.vv v8, v12, v8
181; CHECK-NEXT:    ret
182;
183; CHECK-ZVBB-LABEL: bitreverse_nxv32i8:
184; CHECK-ZVBB:       # %bb.0:
185; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
186; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
187; CHECK-ZVBB-NEXT:    ret
188  %a = call <vscale x 32 x i8> @llvm.bitreverse.nxv32i8(<vscale x 32 x i8> %va)
189  ret <vscale x 32 x i8> %a
190}
191declare <vscale x 32 x i8> @llvm.bitreverse.nxv32i8(<vscale x 32 x i8>)
192
193define <vscale x 64 x i8> @bitreverse_nxv64i8(<vscale x 64 x i8> %va) {
194; CHECK-LABEL: bitreverse_nxv64i8:
195; CHECK:       # %bb.0:
196; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
197; CHECK-NEXT:    vsll.vi v16, v8, 4
198; CHECK-NEXT:    vsrl.vi v8, v8, 4
199; CHECK-NEXT:    li a0, 51
200; CHECK-NEXT:    vor.vv v8, v8, v16
201; CHECK-NEXT:    vsrl.vi v16, v8, 2
202; CHECK-NEXT:    vand.vx v8, v8, a0
203; CHECK-NEXT:    vand.vx v16, v16, a0
204; CHECK-NEXT:    li a0, 85
205; CHECK-NEXT:    vsll.vi v8, v8, 2
206; CHECK-NEXT:    vor.vv v8, v16, v8
207; CHECK-NEXT:    vsrl.vi v16, v8, 1
208; CHECK-NEXT:    vand.vx v8, v8, a0
209; CHECK-NEXT:    vand.vx v16, v16, a0
210; CHECK-NEXT:    vadd.vv v8, v8, v8
211; CHECK-NEXT:    vor.vv v8, v16, v8
212; CHECK-NEXT:    ret
213;
214; CHECK-ZVBB-LABEL: bitreverse_nxv64i8:
215; CHECK-ZVBB:       # %bb.0:
216; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
217; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
218; CHECK-ZVBB-NEXT:    ret
219  %a = call <vscale x 64 x i8> @llvm.bitreverse.nxv64i8(<vscale x 64 x i8> %va)
220  ret <vscale x 64 x i8> %a
221}
222declare <vscale x 64 x i8> @llvm.bitreverse.nxv64i8(<vscale x 64 x i8>)
223
224define <vscale x 1 x i16> @bitreverse_nxv1i16(<vscale x 1 x i16> %va) {
225; CHECK-LABEL: bitreverse_nxv1i16:
226; CHECK:       # %bb.0:
227; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
228; CHECK-NEXT:    vsrl.vi v9, v8, 8
229; CHECK-NEXT:    vsll.vi v8, v8, 8
230; CHECK-NEXT:    lui a0, 1
231; CHECK-NEXT:    vor.vv v8, v8, v9
232; CHECK-NEXT:    addi a0, a0, -241
233; CHECK-NEXT:    vsrl.vi v9, v8, 4
234; CHECK-NEXT:    vand.vx v8, v8, a0
235; CHECK-NEXT:    vand.vx v9, v9, a0
236; CHECK-NEXT:    lui a0, 3
237; CHECK-NEXT:    addi a0, a0, 819
238; CHECK-NEXT:    vsll.vi v8, v8, 4
239; CHECK-NEXT:    vor.vv v8, v9, v8
240; CHECK-NEXT:    vsrl.vi v9, v8, 2
241; CHECK-NEXT:    vand.vx v8, v8, a0
242; CHECK-NEXT:    vand.vx v9, v9, a0
243; CHECK-NEXT:    lui a0, 5
244; CHECK-NEXT:    addi a0, a0, 1365
245; CHECK-NEXT:    vsll.vi v8, v8, 2
246; CHECK-NEXT:    vor.vv v8, v9, v8
247; CHECK-NEXT:    vsrl.vi v9, v8, 1
248; CHECK-NEXT:    vand.vx v8, v8, a0
249; CHECK-NEXT:    vand.vx v9, v9, a0
250; CHECK-NEXT:    vadd.vv v8, v8, v8
251; CHECK-NEXT:    vor.vv v8, v9, v8
252; CHECK-NEXT:    ret
253;
254; CHECK-ZVBB-LABEL: bitreverse_nxv1i16:
255; CHECK-ZVBB:       # %bb.0:
256; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
257; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
258; CHECK-ZVBB-NEXT:    ret
259  %a = call <vscale x 1 x i16> @llvm.bitreverse.nxv1i16(<vscale x 1 x i16> %va)
260  ret <vscale x 1 x i16> %a
261}
262declare <vscale x 1 x i16> @llvm.bitreverse.nxv1i16(<vscale x 1 x i16>)
263
264define <vscale x 2 x i16> @bitreverse_nxv2i16(<vscale x 2 x i16> %va) {
265; CHECK-LABEL: bitreverse_nxv2i16:
266; CHECK:       # %bb.0:
267; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
268; CHECK-NEXT:    vsrl.vi v9, v8, 8
269; CHECK-NEXT:    vsll.vi v8, v8, 8
270; CHECK-NEXT:    lui a0, 1
271; CHECK-NEXT:    vor.vv v8, v8, v9
272; CHECK-NEXT:    addi a0, a0, -241
273; CHECK-NEXT:    vsrl.vi v9, v8, 4
274; CHECK-NEXT:    vand.vx v8, v8, a0
275; CHECK-NEXT:    vand.vx v9, v9, a0
276; CHECK-NEXT:    lui a0, 3
277; CHECK-NEXT:    addi a0, a0, 819
278; CHECK-NEXT:    vsll.vi v8, v8, 4
279; CHECK-NEXT:    vor.vv v8, v9, v8
280; CHECK-NEXT:    vsrl.vi v9, v8, 2
281; CHECK-NEXT:    vand.vx v8, v8, a0
282; CHECK-NEXT:    vand.vx v9, v9, a0
283; CHECK-NEXT:    lui a0, 5
284; CHECK-NEXT:    addi a0, a0, 1365
285; CHECK-NEXT:    vsll.vi v8, v8, 2
286; CHECK-NEXT:    vor.vv v8, v9, v8
287; CHECK-NEXT:    vsrl.vi v9, v8, 1
288; CHECK-NEXT:    vand.vx v8, v8, a0
289; CHECK-NEXT:    vand.vx v9, v9, a0
290; CHECK-NEXT:    vadd.vv v8, v8, v8
291; CHECK-NEXT:    vor.vv v8, v9, v8
292; CHECK-NEXT:    ret
293;
294; CHECK-ZVBB-LABEL: bitreverse_nxv2i16:
295; CHECK-ZVBB:       # %bb.0:
296; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
297; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
298; CHECK-ZVBB-NEXT:    ret
299  %a = call <vscale x 2 x i16> @llvm.bitreverse.nxv2i16(<vscale x 2 x i16> %va)
300  ret <vscale x 2 x i16> %a
301}
302declare <vscale x 2 x i16> @llvm.bitreverse.nxv2i16(<vscale x 2 x i16>)
303
304define <vscale x 4 x i16> @bitreverse_nxv4i16(<vscale x 4 x i16> %va) {
305; CHECK-LABEL: bitreverse_nxv4i16:
306; CHECK:       # %bb.0:
307; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
308; CHECK-NEXT:    vsrl.vi v9, v8, 8
309; CHECK-NEXT:    vsll.vi v8, v8, 8
310; CHECK-NEXT:    lui a0, 1
311; CHECK-NEXT:    vor.vv v8, v8, v9
312; CHECK-NEXT:    addi a0, a0, -241
313; CHECK-NEXT:    vsrl.vi v9, v8, 4
314; CHECK-NEXT:    vand.vx v8, v8, a0
315; CHECK-NEXT:    vand.vx v9, v9, a0
316; CHECK-NEXT:    lui a0, 3
317; CHECK-NEXT:    addi a0, a0, 819
318; CHECK-NEXT:    vsll.vi v8, v8, 4
319; CHECK-NEXT:    vor.vv v8, v9, v8
320; CHECK-NEXT:    vsrl.vi v9, v8, 2
321; CHECK-NEXT:    vand.vx v8, v8, a0
322; CHECK-NEXT:    vand.vx v9, v9, a0
323; CHECK-NEXT:    lui a0, 5
324; CHECK-NEXT:    addi a0, a0, 1365
325; CHECK-NEXT:    vsll.vi v8, v8, 2
326; CHECK-NEXT:    vor.vv v8, v9, v8
327; CHECK-NEXT:    vsrl.vi v9, v8, 1
328; CHECK-NEXT:    vand.vx v8, v8, a0
329; CHECK-NEXT:    vand.vx v9, v9, a0
330; CHECK-NEXT:    vadd.vv v8, v8, v8
331; CHECK-NEXT:    vor.vv v8, v9, v8
332; CHECK-NEXT:    ret
333;
334; CHECK-ZVBB-LABEL: bitreverse_nxv4i16:
335; CHECK-ZVBB:       # %bb.0:
336; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
337; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
338; CHECK-ZVBB-NEXT:    ret
339  %a = call <vscale x 4 x i16> @llvm.bitreverse.nxv4i16(<vscale x 4 x i16> %va)
340  ret <vscale x 4 x i16> %a
341}
342declare <vscale x 4 x i16> @llvm.bitreverse.nxv4i16(<vscale x 4 x i16>)
343
344define <vscale x 8 x i16> @bitreverse_nxv8i16(<vscale x 8 x i16> %va) {
345; CHECK-LABEL: bitreverse_nxv8i16:
346; CHECK:       # %bb.0:
347; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
348; CHECK-NEXT:    vsrl.vi v10, v8, 8
349; CHECK-NEXT:    vsll.vi v8, v8, 8
350; CHECK-NEXT:    lui a0, 1
351; CHECK-NEXT:    vor.vv v8, v8, v10
352; CHECK-NEXT:    addi a0, a0, -241
353; CHECK-NEXT:    vsrl.vi v10, v8, 4
354; CHECK-NEXT:    vand.vx v8, v8, a0
355; CHECK-NEXT:    vand.vx v10, v10, a0
356; CHECK-NEXT:    lui a0, 3
357; CHECK-NEXT:    addi a0, a0, 819
358; CHECK-NEXT:    vsll.vi v8, v8, 4
359; CHECK-NEXT:    vor.vv v8, v10, v8
360; CHECK-NEXT:    vsrl.vi v10, v8, 2
361; CHECK-NEXT:    vand.vx v8, v8, a0
362; CHECK-NEXT:    vand.vx v10, v10, a0
363; CHECK-NEXT:    lui a0, 5
364; CHECK-NEXT:    addi a0, a0, 1365
365; CHECK-NEXT:    vsll.vi v8, v8, 2
366; CHECK-NEXT:    vor.vv v8, v10, v8
367; CHECK-NEXT:    vsrl.vi v10, v8, 1
368; CHECK-NEXT:    vand.vx v8, v8, a0
369; CHECK-NEXT:    vand.vx v10, v10, a0
370; CHECK-NEXT:    vadd.vv v8, v8, v8
371; CHECK-NEXT:    vor.vv v8, v10, v8
372; CHECK-NEXT:    ret
373;
374; CHECK-ZVBB-LABEL: bitreverse_nxv8i16:
375; CHECK-ZVBB:       # %bb.0:
376; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
377; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
378; CHECK-ZVBB-NEXT:    ret
379  %a = call <vscale x 8 x i16> @llvm.bitreverse.nxv8i16(<vscale x 8 x i16> %va)
380  ret <vscale x 8 x i16> %a
381}
382declare <vscale x 8 x i16> @llvm.bitreverse.nxv8i16(<vscale x 8 x i16>)
383
384define <vscale x 16 x i16> @bitreverse_nxv16i16(<vscale x 16 x i16> %va) {
385; CHECK-LABEL: bitreverse_nxv16i16:
386; CHECK:       # %bb.0:
387; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
388; CHECK-NEXT:    vsrl.vi v12, v8, 8
389; CHECK-NEXT:    vsll.vi v8, v8, 8
390; CHECK-NEXT:    lui a0, 1
391; CHECK-NEXT:    vor.vv v8, v8, v12
392; CHECK-NEXT:    addi a0, a0, -241
393; CHECK-NEXT:    vsrl.vi v12, v8, 4
394; CHECK-NEXT:    vand.vx v8, v8, a0
395; CHECK-NEXT:    vand.vx v12, v12, a0
396; CHECK-NEXT:    lui a0, 3
397; CHECK-NEXT:    addi a0, a0, 819
398; CHECK-NEXT:    vsll.vi v8, v8, 4
399; CHECK-NEXT:    vor.vv v8, v12, v8
400; CHECK-NEXT:    vsrl.vi v12, v8, 2
401; CHECK-NEXT:    vand.vx v8, v8, a0
402; CHECK-NEXT:    vand.vx v12, v12, a0
403; CHECK-NEXT:    lui a0, 5
404; CHECK-NEXT:    addi a0, a0, 1365
405; CHECK-NEXT:    vsll.vi v8, v8, 2
406; CHECK-NEXT:    vor.vv v8, v12, v8
407; CHECK-NEXT:    vsrl.vi v12, v8, 1
408; CHECK-NEXT:    vand.vx v8, v8, a0
409; CHECK-NEXT:    vand.vx v12, v12, a0
410; CHECK-NEXT:    vadd.vv v8, v8, v8
411; CHECK-NEXT:    vor.vv v8, v12, v8
412; CHECK-NEXT:    ret
413;
414; CHECK-ZVBB-LABEL: bitreverse_nxv16i16:
415; CHECK-ZVBB:       # %bb.0:
416; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
417; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
418; CHECK-ZVBB-NEXT:    ret
419  %a = call <vscale x 16 x i16> @llvm.bitreverse.nxv16i16(<vscale x 16 x i16> %va)
420  ret <vscale x 16 x i16> %a
421}
422declare <vscale x 16 x i16> @llvm.bitreverse.nxv16i16(<vscale x 16 x i16>)
423
424define <vscale x 32 x i16> @bitreverse_nxv32i16(<vscale x 32 x i16> %va) {
425; CHECK-LABEL: bitreverse_nxv32i16:
426; CHECK:       # %bb.0:
427; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
428; CHECK-NEXT:    vsrl.vi v16, v8, 8
429; CHECK-NEXT:    vsll.vi v8, v8, 8
430; CHECK-NEXT:    lui a0, 1
431; CHECK-NEXT:    vor.vv v8, v8, v16
432; CHECK-NEXT:    addi a0, a0, -241
433; CHECK-NEXT:    vsrl.vi v16, v8, 4
434; CHECK-NEXT:    vand.vx v8, v8, a0
435; CHECK-NEXT:    vand.vx v16, v16, a0
436; CHECK-NEXT:    lui a0, 3
437; CHECK-NEXT:    addi a0, a0, 819
438; CHECK-NEXT:    vsll.vi v8, v8, 4
439; CHECK-NEXT:    vor.vv v8, v16, v8
440; CHECK-NEXT:    vsrl.vi v16, v8, 2
441; CHECK-NEXT:    vand.vx v8, v8, a0
442; CHECK-NEXT:    vand.vx v16, v16, a0
443; CHECK-NEXT:    lui a0, 5
444; CHECK-NEXT:    addi a0, a0, 1365
445; CHECK-NEXT:    vsll.vi v8, v8, 2
446; CHECK-NEXT:    vor.vv v8, v16, v8
447; CHECK-NEXT:    vsrl.vi v16, v8, 1
448; CHECK-NEXT:    vand.vx v8, v8, a0
449; CHECK-NEXT:    vand.vx v16, v16, a0
450; CHECK-NEXT:    vadd.vv v8, v8, v8
451; CHECK-NEXT:    vor.vv v8, v16, v8
452; CHECK-NEXT:    ret
453;
454; CHECK-ZVBB-LABEL: bitreverse_nxv32i16:
455; CHECK-ZVBB:       # %bb.0:
456; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
457; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
458; CHECK-ZVBB-NEXT:    ret
459  %a = call <vscale x 32 x i16> @llvm.bitreverse.nxv32i16(<vscale x 32 x i16> %va)
460  ret <vscale x 32 x i16> %a
461}
462declare <vscale x 32 x i16> @llvm.bitreverse.nxv32i16(<vscale x 32 x i16>)
463
464define <vscale x 1 x i32> @bitreverse_nxv1i32(<vscale x 1 x i32> %va) {
465; CHECK-LABEL: bitreverse_nxv1i32:
466; CHECK:       # %bb.0:
467; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
468; CHECK-NEXT:    vsrl.vi v9, v8, 8
469; CHECK-NEXT:    lui a0, 16
470; CHECK-NEXT:    vsrl.vi v10, v8, 24
471; CHECK-NEXT:    addi a0, a0, -256
472; CHECK-NEXT:    vand.vx v9, v9, a0
473; CHECK-NEXT:    vor.vv v9, v9, v10
474; CHECK-NEXT:    vsll.vi v10, v8, 24
475; CHECK-NEXT:    vand.vx v8, v8, a0
476; CHECK-NEXT:    lui a0, 61681
477; CHECK-NEXT:    addi a0, a0, -241
478; CHECK-NEXT:    vsll.vi v8, v8, 8
479; CHECK-NEXT:    vor.vv v8, v10, v8
480; CHECK-NEXT:    vor.vv v8, v8, v9
481; CHECK-NEXT:    vsrl.vi v9, v8, 4
482; CHECK-NEXT:    vand.vx v8, v8, a0
483; CHECK-NEXT:    vand.vx v9, v9, a0
484; CHECK-NEXT:    lui a0, 209715
485; CHECK-NEXT:    addi a0, a0, 819
486; CHECK-NEXT:    vsll.vi v8, v8, 4
487; CHECK-NEXT:    vor.vv v8, v9, v8
488; CHECK-NEXT:    vsrl.vi v9, v8, 2
489; CHECK-NEXT:    vand.vx v8, v8, a0
490; CHECK-NEXT:    vand.vx v9, v9, a0
491; CHECK-NEXT:    lui a0, 349525
492; CHECK-NEXT:    addi a0, a0, 1365
493; CHECK-NEXT:    vsll.vi v8, v8, 2
494; CHECK-NEXT:    vor.vv v8, v9, v8
495; CHECK-NEXT:    vsrl.vi v9, v8, 1
496; CHECK-NEXT:    vand.vx v8, v8, a0
497; CHECK-NEXT:    vand.vx v9, v9, a0
498; CHECK-NEXT:    vadd.vv v8, v8, v8
499; CHECK-NEXT:    vor.vv v8, v9, v8
500; CHECK-NEXT:    ret
501;
502; CHECK-ZVBB-LABEL: bitreverse_nxv1i32:
503; CHECK-ZVBB:       # %bb.0:
504; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
505; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
506; CHECK-ZVBB-NEXT:    ret
507  %a = call <vscale x 1 x i32> @llvm.bitreverse.nxv1i32(<vscale x 1 x i32> %va)
508  ret <vscale x 1 x i32> %a
509}
510declare <vscale x 1 x i32> @llvm.bitreverse.nxv1i32(<vscale x 1 x i32>)
511
512define <vscale x 2 x i32> @bitreverse_nxv2i32(<vscale x 2 x i32> %va) {
513; CHECK-LABEL: bitreverse_nxv2i32:
514; CHECK:       # %bb.0:
515; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
516; CHECK-NEXT:    vsrl.vi v9, v8, 8
517; CHECK-NEXT:    lui a0, 16
518; CHECK-NEXT:    vsrl.vi v10, v8, 24
519; CHECK-NEXT:    addi a0, a0, -256
520; CHECK-NEXT:    vand.vx v9, v9, a0
521; CHECK-NEXT:    vor.vv v9, v9, v10
522; CHECK-NEXT:    vsll.vi v10, v8, 24
523; CHECK-NEXT:    vand.vx v8, v8, a0
524; CHECK-NEXT:    lui a0, 61681
525; CHECK-NEXT:    addi a0, a0, -241
526; CHECK-NEXT:    vsll.vi v8, v8, 8
527; CHECK-NEXT:    vor.vv v8, v10, v8
528; CHECK-NEXT:    vor.vv v8, v8, v9
529; CHECK-NEXT:    vsrl.vi v9, v8, 4
530; CHECK-NEXT:    vand.vx v8, v8, a0
531; CHECK-NEXT:    vand.vx v9, v9, a0
532; CHECK-NEXT:    lui a0, 209715
533; CHECK-NEXT:    addi a0, a0, 819
534; CHECK-NEXT:    vsll.vi v8, v8, 4
535; CHECK-NEXT:    vor.vv v8, v9, v8
536; CHECK-NEXT:    vsrl.vi v9, v8, 2
537; CHECK-NEXT:    vand.vx v8, v8, a0
538; CHECK-NEXT:    vand.vx v9, v9, a0
539; CHECK-NEXT:    lui a0, 349525
540; CHECK-NEXT:    addi a0, a0, 1365
541; CHECK-NEXT:    vsll.vi v8, v8, 2
542; CHECK-NEXT:    vor.vv v8, v9, v8
543; CHECK-NEXT:    vsrl.vi v9, v8, 1
544; CHECK-NEXT:    vand.vx v8, v8, a0
545; CHECK-NEXT:    vand.vx v9, v9, a0
546; CHECK-NEXT:    vadd.vv v8, v8, v8
547; CHECK-NEXT:    vor.vv v8, v9, v8
548; CHECK-NEXT:    ret
549;
550; CHECK-ZVBB-LABEL: bitreverse_nxv2i32:
551; CHECK-ZVBB:       # %bb.0:
552; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
553; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
554; CHECK-ZVBB-NEXT:    ret
555  %a = call <vscale x 2 x i32> @llvm.bitreverse.nxv2i32(<vscale x 2 x i32> %va)
556  ret <vscale x 2 x i32> %a
557}
558declare <vscale x 2 x i32> @llvm.bitreverse.nxv2i32(<vscale x 2 x i32>)
559
560define <vscale x 4 x i32> @bitreverse_nxv4i32(<vscale x 4 x i32> %va) {
561; CHECK-LABEL: bitreverse_nxv4i32:
562; CHECK:       # %bb.0:
563; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
564; CHECK-NEXT:    vsrl.vi v10, v8, 8
565; CHECK-NEXT:    lui a0, 16
566; CHECK-NEXT:    vsrl.vi v12, v8, 24
567; CHECK-NEXT:    addi a0, a0, -256
568; CHECK-NEXT:    vand.vx v10, v10, a0
569; CHECK-NEXT:    vor.vv v10, v10, v12
570; CHECK-NEXT:    vsll.vi v12, v8, 24
571; CHECK-NEXT:    vand.vx v8, v8, a0
572; CHECK-NEXT:    lui a0, 61681
573; CHECK-NEXT:    addi a0, a0, -241
574; CHECK-NEXT:    vsll.vi v8, v8, 8
575; CHECK-NEXT:    vor.vv v8, v12, v8
576; CHECK-NEXT:    vor.vv v8, v8, v10
577; CHECK-NEXT:    vsrl.vi v10, v8, 4
578; CHECK-NEXT:    vand.vx v8, v8, a0
579; CHECK-NEXT:    vand.vx v10, v10, a0
580; CHECK-NEXT:    lui a0, 209715
581; CHECK-NEXT:    addi a0, a0, 819
582; CHECK-NEXT:    vsll.vi v8, v8, 4
583; CHECK-NEXT:    vor.vv v8, v10, v8
584; CHECK-NEXT:    vsrl.vi v10, v8, 2
585; CHECK-NEXT:    vand.vx v8, v8, a0
586; CHECK-NEXT:    vand.vx v10, v10, a0
587; CHECK-NEXT:    lui a0, 349525
588; CHECK-NEXT:    addi a0, a0, 1365
589; CHECK-NEXT:    vsll.vi v8, v8, 2
590; CHECK-NEXT:    vor.vv v8, v10, v8
591; CHECK-NEXT:    vsrl.vi v10, v8, 1
592; CHECK-NEXT:    vand.vx v8, v8, a0
593; CHECK-NEXT:    vand.vx v10, v10, a0
594; CHECK-NEXT:    vadd.vv v8, v8, v8
595; CHECK-NEXT:    vor.vv v8, v10, v8
596; CHECK-NEXT:    ret
597;
598; CHECK-ZVBB-LABEL: bitreverse_nxv4i32:
599; CHECK-ZVBB:       # %bb.0:
600; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
601; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
602; CHECK-ZVBB-NEXT:    ret
603  %a = call <vscale x 4 x i32> @llvm.bitreverse.nxv4i32(<vscale x 4 x i32> %va)
604  ret <vscale x 4 x i32> %a
605}
606declare <vscale x 4 x i32> @llvm.bitreverse.nxv4i32(<vscale x 4 x i32>)
607
608define <vscale x 8 x i32> @bitreverse_nxv8i32(<vscale x 8 x i32> %va) {
609; CHECK-LABEL: bitreverse_nxv8i32:
610; CHECK:       # %bb.0:
611; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
612; CHECK-NEXT:    vsrl.vi v12, v8, 8
613; CHECK-NEXT:    lui a0, 16
614; CHECK-NEXT:    vsrl.vi v16, v8, 24
615; CHECK-NEXT:    addi a0, a0, -256
616; CHECK-NEXT:    vand.vx v12, v12, a0
617; CHECK-NEXT:    vor.vv v12, v12, v16
618; CHECK-NEXT:    vsll.vi v16, v8, 24
619; CHECK-NEXT:    vand.vx v8, v8, a0
620; CHECK-NEXT:    lui a0, 61681
621; CHECK-NEXT:    addi a0, a0, -241
622; CHECK-NEXT:    vsll.vi v8, v8, 8
623; CHECK-NEXT:    vor.vv v8, v16, v8
624; CHECK-NEXT:    vor.vv v8, v8, v12
625; CHECK-NEXT:    vsrl.vi v12, v8, 4
626; CHECK-NEXT:    vand.vx v8, v8, a0
627; CHECK-NEXT:    vand.vx v12, v12, a0
628; CHECK-NEXT:    lui a0, 209715
629; CHECK-NEXT:    addi a0, a0, 819
630; CHECK-NEXT:    vsll.vi v8, v8, 4
631; CHECK-NEXT:    vor.vv v8, v12, v8
632; CHECK-NEXT:    vsrl.vi v12, v8, 2
633; CHECK-NEXT:    vand.vx v8, v8, a0
634; CHECK-NEXT:    vand.vx v12, v12, a0
635; CHECK-NEXT:    lui a0, 349525
636; CHECK-NEXT:    addi a0, a0, 1365
637; CHECK-NEXT:    vsll.vi v8, v8, 2
638; CHECK-NEXT:    vor.vv v8, v12, v8
639; CHECK-NEXT:    vsrl.vi v12, v8, 1
640; CHECK-NEXT:    vand.vx v8, v8, a0
641; CHECK-NEXT:    vand.vx v12, v12, a0
642; CHECK-NEXT:    vadd.vv v8, v8, v8
643; CHECK-NEXT:    vor.vv v8, v12, v8
644; CHECK-NEXT:    ret
645;
646; CHECK-ZVBB-LABEL: bitreverse_nxv8i32:
647; CHECK-ZVBB:       # %bb.0:
648; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
649; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
650; CHECK-ZVBB-NEXT:    ret
651  %a = call <vscale x 8 x i32> @llvm.bitreverse.nxv8i32(<vscale x 8 x i32> %va)
652  ret <vscale x 8 x i32> %a
653}
654declare <vscale x 8 x i32> @llvm.bitreverse.nxv8i32(<vscale x 8 x i32>)
655
656define <vscale x 16 x i32> @bitreverse_nxv16i32(<vscale x 16 x i32> %va) {
657; CHECK-LABEL: bitreverse_nxv16i32:
658; CHECK:       # %bb.0:
659; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
660; CHECK-NEXT:    vsrl.vi v16, v8, 8
661; CHECK-NEXT:    lui a0, 16
662; CHECK-NEXT:    vsrl.vi v24, v8, 24
663; CHECK-NEXT:    addi a0, a0, -256
664; CHECK-NEXT:    vand.vx v16, v16, a0
665; CHECK-NEXT:    vor.vv v16, v16, v24
666; CHECK-NEXT:    vsll.vi v24, v8, 24
667; CHECK-NEXT:    vand.vx v8, v8, a0
668; CHECK-NEXT:    lui a0, 61681
669; CHECK-NEXT:    addi a0, a0, -241
670; CHECK-NEXT:    vsll.vi v8, v8, 8
671; CHECK-NEXT:    vor.vv v8, v24, v8
672; CHECK-NEXT:    vor.vv v8, v8, v16
673; CHECK-NEXT:    vsrl.vi v16, v8, 4
674; CHECK-NEXT:    vand.vx v8, v8, a0
675; CHECK-NEXT:    vand.vx v16, v16, a0
676; CHECK-NEXT:    lui a0, 209715
677; CHECK-NEXT:    addi a0, a0, 819
678; CHECK-NEXT:    vsll.vi v8, v8, 4
679; CHECK-NEXT:    vor.vv v8, v16, v8
680; CHECK-NEXT:    vsrl.vi v16, v8, 2
681; CHECK-NEXT:    vand.vx v8, v8, a0
682; CHECK-NEXT:    vand.vx v16, v16, a0
683; CHECK-NEXT:    lui a0, 349525
684; CHECK-NEXT:    addi a0, a0, 1365
685; CHECK-NEXT:    vsll.vi v8, v8, 2
686; CHECK-NEXT:    vor.vv v8, v16, v8
687; CHECK-NEXT:    vsrl.vi v16, v8, 1
688; CHECK-NEXT:    vand.vx v8, v8, a0
689; CHECK-NEXT:    vand.vx v16, v16, a0
690; CHECK-NEXT:    vadd.vv v8, v8, v8
691; CHECK-NEXT:    vor.vv v8, v16, v8
692; CHECK-NEXT:    ret
693;
694; CHECK-ZVBB-LABEL: bitreverse_nxv16i32:
695; CHECK-ZVBB:       # %bb.0:
696; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
697; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
698; CHECK-ZVBB-NEXT:    ret
699  %a = call <vscale x 16 x i32> @llvm.bitreverse.nxv16i32(<vscale x 16 x i32> %va)
700  ret <vscale x 16 x i32> %a
701}
702declare <vscale x 16 x i32> @llvm.bitreverse.nxv16i32(<vscale x 16 x i32>)
703
704define <vscale x 1 x i64> @bitreverse_nxv1i64(<vscale x 1 x i64> %va) {
705; RV32-LABEL: bitreverse_nxv1i64:
706; RV32:       # %bb.0:
707; RV32-NEXT:    addi sp, sp, -16
708; RV32-NEXT:    .cfi_def_cfa_offset 16
709; RV32-NEXT:    lui a0, 1044480
710; RV32-NEXT:    li a1, 56
711; RV32-NEXT:    li a2, 40
712; RV32-NEXT:    lui a3, 16
713; RV32-NEXT:    vsetvli a4, zero, e64, m1, ta, ma
714; RV32-NEXT:    vsrl.vi v9, v8, 24
715; RV32-NEXT:    lui a4, 4080
716; RV32-NEXT:    addi a5, sp, 8
717; RV32-NEXT:    sw a0, 8(sp)
718; RV32-NEXT:    sw zero, 12(sp)
719; RV32-NEXT:    vsrl.vx v10, v8, a1
720; RV32-NEXT:    vsrl.vx v11, v8, a2
721; RV32-NEXT:    addi a0, a3, -256
722; RV32-NEXT:    vsll.vx v12, v8, a1
723; RV32-NEXT:    vand.vx v11, v11, a0
724; RV32-NEXT:    vlse64.v v13, (a5), zero
725; RV32-NEXT:    vor.vv v10, v11, v10
726; RV32-NEXT:    vand.vx v11, v8, a0
727; RV32-NEXT:    vsll.vx v11, v11, a2
728; RV32-NEXT:    vor.vv v11, v12, v11
729; RV32-NEXT:    vsrl.vi v12, v8, 8
730; RV32-NEXT:    vand.vx v9, v9, a4
731; RV32-NEXT:    vand.vv v12, v12, v13
732; RV32-NEXT:    vor.vv v9, v12, v9
733; RV32-NEXT:    lui a0, 61681
734; RV32-NEXT:    lui a1, 209715
735; RV32-NEXT:    lui a2, 349525
736; RV32-NEXT:    vand.vv v12, v8, v13
737; RV32-NEXT:    vand.vx v8, v8, a4
738; RV32-NEXT:    addi a0, a0, -241
739; RV32-NEXT:    addi a1, a1, 819
740; RV32-NEXT:    addi a2, a2, 1365
741; RV32-NEXT:    vsll.vi v8, v8, 24
742; RV32-NEXT:    vor.vv v9, v9, v10
743; RV32-NEXT:    vsetvli a3, zero, e32, m1, ta, ma
744; RV32-NEXT:    vmv.v.x v10, a0
745; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
746; RV32-NEXT:    vsll.vi v12, v12, 8
747; RV32-NEXT:    vor.vv v8, v8, v12
748; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
749; RV32-NEXT:    vmv.v.x v12, a1
750; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
751; RV32-NEXT:    vor.vv v8, v11, v8
752; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
753; RV32-NEXT:    vmv.v.x v11, a2
754; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
755; RV32-NEXT:    vor.vv v8, v8, v9
756; RV32-NEXT:    vsrl.vi v9, v8, 4
757; RV32-NEXT:    vand.vv v8, v8, v10
758; RV32-NEXT:    vand.vv v9, v9, v10
759; RV32-NEXT:    vsll.vi v8, v8, 4
760; RV32-NEXT:    vor.vv v8, v9, v8
761; RV32-NEXT:    vsrl.vi v9, v8, 2
762; RV32-NEXT:    vand.vv v8, v8, v12
763; RV32-NEXT:    vand.vv v9, v9, v12
764; RV32-NEXT:    vsll.vi v8, v8, 2
765; RV32-NEXT:    vor.vv v8, v9, v8
766; RV32-NEXT:    vsrl.vi v9, v8, 1
767; RV32-NEXT:    vand.vv v8, v8, v11
768; RV32-NEXT:    vand.vv v9, v9, v11
769; RV32-NEXT:    vadd.vv v8, v8, v8
770; RV32-NEXT:    vor.vv v8, v9, v8
771; RV32-NEXT:    addi sp, sp, 16
772; RV32-NEXT:    .cfi_def_cfa_offset 0
773; RV32-NEXT:    ret
774;
775; RV64-LABEL: bitreverse_nxv1i64:
776; RV64:       # %bb.0:
777; RV64-NEXT:    li a1, 56
778; RV64-NEXT:    li a0, 40
779; RV64-NEXT:    lui a2, 16
780; RV64-NEXT:    vsetvli a3, zero, e64, m1, ta, ma
781; RV64-NEXT:    vsrl.vi v9, v8, 24
782; RV64-NEXT:    lui a3, 4080
783; RV64-NEXT:    vsrl.vx v10, v8, a1
784; RV64-NEXT:    vsrl.vx v11, v8, a0
785; RV64-NEXT:    addiw a2, a2, -256
786; RV64-NEXT:    vand.vx v11, v11, a2
787; RV64-NEXT:    vor.vv v10, v11, v10
788; RV64-NEXT:    vsrl.vi v11, v8, 8
789; RV64-NEXT:    li a4, 255
790; RV64-NEXT:    vand.vx v9, v9, a3
791; RV64-NEXT:    slli a4, a4, 24
792; RV64-NEXT:    vand.vx v11, v11, a4
793; RV64-NEXT:    vor.vv v9, v11, v9
794; RV64-NEXT:    vand.vx v11, v8, a3
795; RV64-NEXT:    lui a3, 61681
796; RV64-NEXT:    vor.vv v9, v9, v10
797; RV64-NEXT:    vand.vx v10, v8, a4
798; RV64-NEXT:    lui a4, 209715
799; RV64-NEXT:    vsll.vi v11, v11, 24
800; RV64-NEXT:    vsll.vi v10, v10, 8
801; RV64-NEXT:    vor.vv v10, v11, v10
802; RV64-NEXT:    vsll.vx v11, v8, a1
803; RV64-NEXT:    lui a1, 349525
804; RV64-NEXT:    addiw a3, a3, -241
805; RV64-NEXT:    addiw a4, a4, 819
806; RV64-NEXT:    addiw a1, a1, 1365
807; RV64-NEXT:    vand.vx v8, v8, a2
808; RV64-NEXT:    slli a2, a3, 32
809; RV64-NEXT:    vsll.vx v8, v8, a0
810; RV64-NEXT:    slli a0, a4, 32
811; RV64-NEXT:    add a2, a3, a2
812; RV64-NEXT:    slli a3, a1, 32
813; RV64-NEXT:    add a0, a4, a0
814; RV64-NEXT:    add a1, a1, a3
815; RV64-NEXT:    vor.vv v8, v11, v8
816; RV64-NEXT:    vor.vv v8, v8, v10
817; RV64-NEXT:    vor.vv v8, v8, v9
818; RV64-NEXT:    vsrl.vi v9, v8, 4
819; RV64-NEXT:    vand.vx v8, v8, a2
820; RV64-NEXT:    vand.vx v9, v9, a2
821; RV64-NEXT:    vsll.vi v8, v8, 4
822; RV64-NEXT:    vor.vv v8, v9, v8
823; RV64-NEXT:    vsrl.vi v9, v8, 2
824; RV64-NEXT:    vand.vx v8, v8, a0
825; RV64-NEXT:    vand.vx v9, v9, a0
826; RV64-NEXT:    vsll.vi v8, v8, 2
827; RV64-NEXT:    vor.vv v8, v9, v8
828; RV64-NEXT:    vsrl.vi v9, v8, 1
829; RV64-NEXT:    vand.vx v8, v8, a1
830; RV64-NEXT:    vand.vx v9, v9, a1
831; RV64-NEXT:    vadd.vv v8, v8, v8
832; RV64-NEXT:    vor.vv v8, v9, v8
833; RV64-NEXT:    ret
834;
835; CHECK-ZVBB-LABEL: bitreverse_nxv1i64:
836; CHECK-ZVBB:       # %bb.0:
837; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
838; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
839; CHECK-ZVBB-NEXT:    ret
840  %a = call <vscale x 1 x i64> @llvm.bitreverse.nxv1i64(<vscale x 1 x i64> %va)
841  ret <vscale x 1 x i64> %a
842}
843declare <vscale x 1 x i64> @llvm.bitreverse.nxv1i64(<vscale x 1 x i64>)
844
845define <vscale x 2 x i64> @bitreverse_nxv2i64(<vscale x 2 x i64> %va) {
846; RV32-LABEL: bitreverse_nxv2i64:
847; RV32:       # %bb.0:
848; RV32-NEXT:    addi sp, sp, -16
849; RV32-NEXT:    .cfi_def_cfa_offset 16
850; RV32-NEXT:    lui a0, 1044480
851; RV32-NEXT:    li a1, 56
852; RV32-NEXT:    li a2, 40
853; RV32-NEXT:    lui a3, 16
854; RV32-NEXT:    vsetvli a4, zero, e64, m2, ta, ma
855; RV32-NEXT:    vsrl.vi v16, v8, 24
856; RV32-NEXT:    lui a4, 4080
857; RV32-NEXT:    addi a5, sp, 8
858; RV32-NEXT:    sw a0, 8(sp)
859; RV32-NEXT:    sw zero, 12(sp)
860; RV32-NEXT:    vsrl.vx v10, v8, a1
861; RV32-NEXT:    vsrl.vx v12, v8, a2
862; RV32-NEXT:    addi a0, a3, -256
863; RV32-NEXT:    vsll.vx v18, v8, a1
864; RV32-NEXT:    vand.vx v12, v12, a0
865; RV32-NEXT:    vlse64.v v14, (a5), zero
866; RV32-NEXT:    vor.vv v12, v12, v10
867; RV32-NEXT:    vand.vx v10, v8, a0
868; RV32-NEXT:    vsll.vx v10, v10, a2
869; RV32-NEXT:    vor.vv v10, v18, v10
870; RV32-NEXT:    vsrl.vi v18, v8, 8
871; RV32-NEXT:    vand.vx v16, v16, a4
872; RV32-NEXT:    vand.vv v18, v18, v14
873; RV32-NEXT:    vor.vv v16, v18, v16
874; RV32-NEXT:    lui a0, 61681
875; RV32-NEXT:    lui a1, 209715
876; RV32-NEXT:    lui a2, 349525
877; RV32-NEXT:    vand.vv v14, v8, v14
878; RV32-NEXT:    vand.vx v8, v8, a4
879; RV32-NEXT:    addi a0, a0, -241
880; RV32-NEXT:    addi a1, a1, 819
881; RV32-NEXT:    addi a2, a2, 1365
882; RV32-NEXT:    vsll.vi v8, v8, 24
883; RV32-NEXT:    vor.vv v12, v16, v12
884; RV32-NEXT:    vsetvli a3, zero, e32, m2, ta, ma
885; RV32-NEXT:    vmv.v.x v16, a0
886; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
887; RV32-NEXT:    vsll.vi v14, v14, 8
888; RV32-NEXT:    vor.vv v8, v8, v14
889; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
890; RV32-NEXT:    vmv.v.x v14, a1
891; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
892; RV32-NEXT:    vor.vv v8, v10, v8
893; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
894; RV32-NEXT:    vmv.v.x v10, a2
895; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
896; RV32-NEXT:    vor.vv v8, v8, v12
897; RV32-NEXT:    vsrl.vi v12, v8, 4
898; RV32-NEXT:    vand.vv v8, v8, v16
899; RV32-NEXT:    vand.vv v12, v12, v16
900; RV32-NEXT:    vsll.vi v8, v8, 4
901; RV32-NEXT:    vor.vv v8, v12, v8
902; RV32-NEXT:    vsrl.vi v12, v8, 2
903; RV32-NEXT:    vand.vv v8, v8, v14
904; RV32-NEXT:    vand.vv v12, v12, v14
905; RV32-NEXT:    vsll.vi v8, v8, 2
906; RV32-NEXT:    vor.vv v8, v12, v8
907; RV32-NEXT:    vsrl.vi v12, v8, 1
908; RV32-NEXT:    vand.vv v8, v8, v10
909; RV32-NEXT:    vand.vv v10, v12, v10
910; RV32-NEXT:    vadd.vv v8, v8, v8
911; RV32-NEXT:    vor.vv v8, v10, v8
912; RV32-NEXT:    addi sp, sp, 16
913; RV32-NEXT:    .cfi_def_cfa_offset 0
914; RV32-NEXT:    ret
915;
916; RV64-LABEL: bitreverse_nxv2i64:
917; RV64:       # %bb.0:
918; RV64-NEXT:    li a1, 56
919; RV64-NEXT:    li a0, 40
920; RV64-NEXT:    lui a2, 16
921; RV64-NEXT:    vsetvli a3, zero, e64, m2, ta, ma
922; RV64-NEXT:    vsrl.vi v10, v8, 24
923; RV64-NEXT:    lui a3, 4080
924; RV64-NEXT:    vsrl.vx v12, v8, a1
925; RV64-NEXT:    vsrl.vx v14, v8, a0
926; RV64-NEXT:    addiw a2, a2, -256
927; RV64-NEXT:    vand.vx v14, v14, a2
928; RV64-NEXT:    vor.vv v12, v14, v12
929; RV64-NEXT:    vsrl.vi v14, v8, 8
930; RV64-NEXT:    li a4, 255
931; RV64-NEXT:    vand.vx v10, v10, a3
932; RV64-NEXT:    slli a4, a4, 24
933; RV64-NEXT:    vand.vx v14, v14, a4
934; RV64-NEXT:    vor.vv v10, v14, v10
935; RV64-NEXT:    vand.vx v14, v8, a3
936; RV64-NEXT:    lui a3, 61681
937; RV64-NEXT:    vor.vv v10, v10, v12
938; RV64-NEXT:    vand.vx v12, v8, a4
939; RV64-NEXT:    lui a4, 209715
940; RV64-NEXT:    vsll.vi v14, v14, 24
941; RV64-NEXT:    vsll.vi v12, v12, 8
942; RV64-NEXT:    vor.vv v12, v14, v12
943; RV64-NEXT:    vsll.vx v14, v8, a1
944; RV64-NEXT:    lui a1, 349525
945; RV64-NEXT:    addiw a3, a3, -241
946; RV64-NEXT:    addiw a4, a4, 819
947; RV64-NEXT:    addiw a1, a1, 1365
948; RV64-NEXT:    vand.vx v8, v8, a2
949; RV64-NEXT:    slli a2, a3, 32
950; RV64-NEXT:    vsll.vx v8, v8, a0
951; RV64-NEXT:    slli a0, a4, 32
952; RV64-NEXT:    add a2, a3, a2
953; RV64-NEXT:    slli a3, a1, 32
954; RV64-NEXT:    add a0, a4, a0
955; RV64-NEXT:    add a1, a1, a3
956; RV64-NEXT:    vor.vv v8, v14, v8
957; RV64-NEXT:    vor.vv v8, v8, v12
958; RV64-NEXT:    vor.vv v8, v8, v10
959; RV64-NEXT:    vsrl.vi v10, v8, 4
960; RV64-NEXT:    vand.vx v8, v8, a2
961; RV64-NEXT:    vand.vx v10, v10, a2
962; RV64-NEXT:    vsll.vi v8, v8, 4
963; RV64-NEXT:    vor.vv v8, v10, v8
964; RV64-NEXT:    vsrl.vi v10, v8, 2
965; RV64-NEXT:    vand.vx v8, v8, a0
966; RV64-NEXT:    vand.vx v10, v10, a0
967; RV64-NEXT:    vsll.vi v8, v8, 2
968; RV64-NEXT:    vor.vv v8, v10, v8
969; RV64-NEXT:    vsrl.vi v10, v8, 1
970; RV64-NEXT:    vand.vx v8, v8, a1
971; RV64-NEXT:    vand.vx v10, v10, a1
972; RV64-NEXT:    vadd.vv v8, v8, v8
973; RV64-NEXT:    vor.vv v8, v10, v8
974; RV64-NEXT:    ret
975;
976; CHECK-ZVBB-LABEL: bitreverse_nxv2i64:
977; CHECK-ZVBB:       # %bb.0:
978; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
979; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
980; CHECK-ZVBB-NEXT:    ret
981  %a = call <vscale x 2 x i64> @llvm.bitreverse.nxv2i64(<vscale x 2 x i64> %va)
982  ret <vscale x 2 x i64> %a
983}
984declare <vscale x 2 x i64> @llvm.bitreverse.nxv2i64(<vscale x 2 x i64>)
985
986define <vscale x 4 x i64> @bitreverse_nxv4i64(<vscale x 4 x i64> %va) {
987; RV32-LABEL: bitreverse_nxv4i64:
988; RV32:       # %bb.0:
989; RV32-NEXT:    addi sp, sp, -16
990; RV32-NEXT:    .cfi_def_cfa_offset 16
991; RV32-NEXT:    lui a0, 1044480
992; RV32-NEXT:    li a1, 56
993; RV32-NEXT:    li a2, 40
994; RV32-NEXT:    lui a3, 16
995; RV32-NEXT:    vsetvli a4, zero, e64, m4, ta, ma
996; RV32-NEXT:    vsrl.vi v24, v8, 24
997; RV32-NEXT:    lui a4, 4080
998; RV32-NEXT:    addi a5, sp, 8
999; RV32-NEXT:    sw a0, 8(sp)
1000; RV32-NEXT:    sw zero, 12(sp)
1001; RV32-NEXT:    vsrl.vx v12, v8, a1
1002; RV32-NEXT:    vsrl.vx v16, v8, a2
1003; RV32-NEXT:    addi a0, a3, -256
1004; RV32-NEXT:    vsll.vx v28, v8, a1
1005; RV32-NEXT:    vand.vx v16, v16, a0
1006; RV32-NEXT:    vlse64.v v20, (a5), zero
1007; RV32-NEXT:    vor.vv v16, v16, v12
1008; RV32-NEXT:    vand.vx v12, v8, a0
1009; RV32-NEXT:    vsll.vx v12, v12, a2
1010; RV32-NEXT:    vor.vv v12, v28, v12
1011; RV32-NEXT:    vsrl.vi v28, v8, 8
1012; RV32-NEXT:    vand.vx v24, v24, a4
1013; RV32-NEXT:    vand.vv v28, v28, v20
1014; RV32-NEXT:    vor.vv v24, v28, v24
1015; RV32-NEXT:    lui a0, 61681
1016; RV32-NEXT:    lui a1, 209715
1017; RV32-NEXT:    lui a2, 349525
1018; RV32-NEXT:    vand.vv v20, v8, v20
1019; RV32-NEXT:    vand.vx v8, v8, a4
1020; RV32-NEXT:    addi a0, a0, -241
1021; RV32-NEXT:    addi a1, a1, 819
1022; RV32-NEXT:    addi a2, a2, 1365
1023; RV32-NEXT:    vsll.vi v8, v8, 24
1024; RV32-NEXT:    vor.vv v16, v24, v16
1025; RV32-NEXT:    vsetvli a3, zero, e32, m4, ta, ma
1026; RV32-NEXT:    vmv.v.x v24, a0
1027; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
1028; RV32-NEXT:    vsll.vi v20, v20, 8
1029; RV32-NEXT:    vor.vv v8, v8, v20
1030; RV32-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
1031; RV32-NEXT:    vmv.v.x v20, a1
1032; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
1033; RV32-NEXT:    vor.vv v8, v12, v8
1034; RV32-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
1035; RV32-NEXT:    vmv.v.x v12, a2
1036; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
1037; RV32-NEXT:    vor.vv v8, v8, v16
1038; RV32-NEXT:    vsrl.vi v16, v8, 4
1039; RV32-NEXT:    vand.vv v8, v8, v24
1040; RV32-NEXT:    vand.vv v16, v16, v24
1041; RV32-NEXT:    vsll.vi v8, v8, 4
1042; RV32-NEXT:    vor.vv v8, v16, v8
1043; RV32-NEXT:    vsrl.vi v16, v8, 2
1044; RV32-NEXT:    vand.vv v8, v8, v20
1045; RV32-NEXT:    vand.vv v16, v16, v20
1046; RV32-NEXT:    vsll.vi v8, v8, 2
1047; RV32-NEXT:    vor.vv v8, v16, v8
1048; RV32-NEXT:    vsrl.vi v16, v8, 1
1049; RV32-NEXT:    vand.vv v8, v8, v12
1050; RV32-NEXT:    vand.vv v12, v16, v12
1051; RV32-NEXT:    vadd.vv v8, v8, v8
1052; RV32-NEXT:    vor.vv v8, v12, v8
1053; RV32-NEXT:    addi sp, sp, 16
1054; RV32-NEXT:    .cfi_def_cfa_offset 0
1055; RV32-NEXT:    ret
1056;
1057; RV64-LABEL: bitreverse_nxv4i64:
1058; RV64:       # %bb.0:
1059; RV64-NEXT:    li a1, 56
1060; RV64-NEXT:    li a0, 40
1061; RV64-NEXT:    lui a2, 16
1062; RV64-NEXT:    vsetvli a3, zero, e64, m4, ta, ma
1063; RV64-NEXT:    vsrl.vi v16, v8, 24
1064; RV64-NEXT:    lui a3, 4080
1065; RV64-NEXT:    vsrl.vx v12, v8, a1
1066; RV64-NEXT:    vsrl.vx v20, v8, a0
1067; RV64-NEXT:    addiw a2, a2, -256
1068; RV64-NEXT:    vand.vx v20, v20, a2
1069; RV64-NEXT:    vor.vv v12, v20, v12
1070; RV64-NEXT:    vsrl.vi v20, v8, 8
1071; RV64-NEXT:    li a4, 255
1072; RV64-NEXT:    vand.vx v16, v16, a3
1073; RV64-NEXT:    slli a4, a4, 24
1074; RV64-NEXT:    vand.vx v20, v20, a4
1075; RV64-NEXT:    vor.vv v20, v20, v16
1076; RV64-NEXT:    vand.vx v16, v8, a3
1077; RV64-NEXT:    lui a3, 61681
1078; RV64-NEXT:    vor.vv v12, v20, v12
1079; RV64-NEXT:    vand.vx v20, v8, a4
1080; RV64-NEXT:    lui a4, 209715
1081; RV64-NEXT:    vsll.vi v16, v16, 24
1082; RV64-NEXT:    vsll.vi v20, v20, 8
1083; RV64-NEXT:    vor.vv v16, v16, v20
1084; RV64-NEXT:    vsll.vx v20, v8, a1
1085; RV64-NEXT:    lui a1, 349525
1086; RV64-NEXT:    addiw a3, a3, -241
1087; RV64-NEXT:    addiw a4, a4, 819
1088; RV64-NEXT:    addiw a1, a1, 1365
1089; RV64-NEXT:    vand.vx v8, v8, a2
1090; RV64-NEXT:    slli a2, a3, 32
1091; RV64-NEXT:    vsll.vx v8, v8, a0
1092; RV64-NEXT:    slli a0, a4, 32
1093; RV64-NEXT:    add a2, a3, a2
1094; RV64-NEXT:    slli a3, a1, 32
1095; RV64-NEXT:    add a0, a4, a0
1096; RV64-NEXT:    add a1, a1, a3
1097; RV64-NEXT:    vor.vv v8, v20, v8
1098; RV64-NEXT:    vor.vv v8, v8, v16
1099; RV64-NEXT:    vor.vv v8, v8, v12
1100; RV64-NEXT:    vsrl.vi v12, v8, 4
1101; RV64-NEXT:    vand.vx v8, v8, a2
1102; RV64-NEXT:    vand.vx v12, v12, a2
1103; RV64-NEXT:    vsll.vi v8, v8, 4
1104; RV64-NEXT:    vor.vv v8, v12, v8
1105; RV64-NEXT:    vsrl.vi v12, v8, 2
1106; RV64-NEXT:    vand.vx v8, v8, a0
1107; RV64-NEXT:    vand.vx v12, v12, a0
1108; RV64-NEXT:    vsll.vi v8, v8, 2
1109; RV64-NEXT:    vor.vv v8, v12, v8
1110; RV64-NEXT:    vsrl.vi v12, v8, 1
1111; RV64-NEXT:    vand.vx v8, v8, a1
1112; RV64-NEXT:    vand.vx v12, v12, a1
1113; RV64-NEXT:    vadd.vv v8, v8, v8
1114; RV64-NEXT:    vor.vv v8, v12, v8
1115; RV64-NEXT:    ret
1116;
1117; CHECK-ZVBB-LABEL: bitreverse_nxv4i64:
1118; CHECK-ZVBB:       # %bb.0:
1119; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
1120; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
1121; CHECK-ZVBB-NEXT:    ret
1122  %a = call <vscale x 4 x i64> @llvm.bitreverse.nxv4i64(<vscale x 4 x i64> %va)
1123  ret <vscale x 4 x i64> %a
1124}
1125declare <vscale x 4 x i64> @llvm.bitreverse.nxv4i64(<vscale x 4 x i64>)
1126
1127define <vscale x 8 x i64> @bitreverse_nxv8i64(<vscale x 8 x i64> %va) {
1128; RV32-LABEL: bitreverse_nxv8i64:
1129; RV32:       # %bb.0:
1130; RV32-NEXT:    addi sp, sp, -16
1131; RV32-NEXT:    .cfi_def_cfa_offset 16
1132; RV32-NEXT:    csrr a0, vlenb
1133; RV32-NEXT:    slli a0, a0, 4
1134; RV32-NEXT:    sub sp, sp, a0
1135; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
1136; RV32-NEXT:    lui a0, 1044480
1137; RV32-NEXT:    li a1, 56
1138; RV32-NEXT:    li a2, 40
1139; RV32-NEXT:    lui a3, 16
1140; RV32-NEXT:    lui a4, 4080
1141; RV32-NEXT:    addi a5, sp, 8
1142; RV32-NEXT:    sw a0, 8(sp)
1143; RV32-NEXT:    sw zero, 12(sp)
1144; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
1145; RV32-NEXT:    vsrl.vx v16, v8, a1
1146; RV32-NEXT:    vsrl.vx v24, v8, a2
1147; RV32-NEXT:    addi a0, a3, -256
1148; RV32-NEXT:    vsll.vx v0, v8, a1
1149; RV32-NEXT:    vand.vx v24, v24, a0
1150; RV32-NEXT:    vor.vv v16, v24, v16
1151; RV32-NEXT:    addi a1, sp, 16
1152; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
1153; RV32-NEXT:    vand.vx v16, v8, a0
1154; RV32-NEXT:    vsll.vx v16, v16, a2
1155; RV32-NEXT:    vor.vv v16, v0, v16
1156; RV32-NEXT:    csrr a0, vlenb
1157; RV32-NEXT:    slli a0, a0, 3
1158; RV32-NEXT:    add a0, sp, a0
1159; RV32-NEXT:    addi a0, a0, 16
1160; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
1161; RV32-NEXT:    vlse64.v v0, (a5), zero
1162; RV32-NEXT:    vsrl.vi v16, v8, 24
1163; RV32-NEXT:    vand.vx v16, v16, a4
1164; RV32-NEXT:    vsrl.vi v24, v8, 8
1165; RV32-NEXT:    vand.vv v24, v24, v0
1166; RV32-NEXT:    vor.vv v16, v24, v16
1167; RV32-NEXT:    addi a0, sp, 16
1168; RV32-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
1169; RV32-NEXT:    vor.vv v24, v16, v24
1170; RV32-NEXT:    vand.vv v16, v8, v0
1171; RV32-NEXT:    vand.vx v8, v8, a4
1172; RV32-NEXT:    vsll.vi v8, v8, 24
1173; RV32-NEXT:    vsll.vi v16, v16, 8
1174; RV32-NEXT:    vor.vv v8, v8, v16
1175; RV32-NEXT:    lui a0, 61681
1176; RV32-NEXT:    lui a1, 209715
1177; RV32-NEXT:    lui a2, 349525
1178; RV32-NEXT:    addi a0, a0, -241
1179; RV32-NEXT:    addi a1, a1, 819
1180; RV32-NEXT:    addi a2, a2, 1365
1181; RV32-NEXT:    csrr a3, vlenb
1182; RV32-NEXT:    slli a3, a3, 3
1183; RV32-NEXT:    add a3, sp, a3
1184; RV32-NEXT:    addi a3, a3, 16
1185; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
1186; RV32-NEXT:    vor.vv v8, v16, v8
1187; RV32-NEXT:    vsetvli a3, zero, e32, m8, ta, ma
1188; RV32-NEXT:    vmv.v.x v16, a0
1189; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
1190; RV32-NEXT:    vor.vv v8, v8, v24
1191; RV32-NEXT:    vsrl.vi v24, v8, 4
1192; RV32-NEXT:    vand.vv v8, v8, v16
1193; RV32-NEXT:    vand.vv v16, v24, v16
1194; RV32-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
1195; RV32-NEXT:    vmv.v.x v24, a1
1196; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
1197; RV32-NEXT:    vsll.vi v8, v8, 4
1198; RV32-NEXT:    vor.vv v8, v16, v8
1199; RV32-NEXT:    vsrl.vi v16, v8, 2
1200; RV32-NEXT:    vand.vv v8, v8, v24
1201; RV32-NEXT:    vand.vv v16, v16, v24
1202; RV32-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
1203; RV32-NEXT:    vmv.v.x v24, a2
1204; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
1205; RV32-NEXT:    vsll.vi v8, v8, 2
1206; RV32-NEXT:    vor.vv v8, v16, v8
1207; RV32-NEXT:    vsrl.vi v16, v8, 1
1208; RV32-NEXT:    vand.vv v8, v8, v24
1209; RV32-NEXT:    vand.vv v16, v16, v24
1210; RV32-NEXT:    vadd.vv v8, v8, v8
1211; RV32-NEXT:    vor.vv v8, v16, v8
1212; RV32-NEXT:    csrr a0, vlenb
1213; RV32-NEXT:    slli a0, a0, 4
1214; RV32-NEXT:    add sp, sp, a0
1215; RV32-NEXT:    .cfi_def_cfa sp, 16
1216; RV32-NEXT:    addi sp, sp, 16
1217; RV32-NEXT:    .cfi_def_cfa_offset 0
1218; RV32-NEXT:    ret
1219;
1220; RV64-LABEL: bitreverse_nxv8i64:
1221; RV64:       # %bb.0:
1222; RV64-NEXT:    li a1, 56
1223; RV64-NEXT:    li a0, 40
1224; RV64-NEXT:    lui a2, 16
1225; RV64-NEXT:    vsetvli a3, zero, e64, m8, ta, ma
1226; RV64-NEXT:    vsrl.vi v24, v8, 24
1227; RV64-NEXT:    lui a3, 4080
1228; RV64-NEXT:    vsrl.vx v16, v8, a1
1229; RV64-NEXT:    vsrl.vx v0, v8, a0
1230; RV64-NEXT:    addiw a2, a2, -256
1231; RV64-NEXT:    vand.vx v0, v0, a2
1232; RV64-NEXT:    vor.vv v16, v0, v16
1233; RV64-NEXT:    vsrl.vi v0, v8, 8
1234; RV64-NEXT:    li a4, 255
1235; RV64-NEXT:    vand.vx v24, v24, a3
1236; RV64-NEXT:    slli a4, a4, 24
1237; RV64-NEXT:    vand.vx v0, v0, a4
1238; RV64-NEXT:    vor.vv v0, v0, v24
1239; RV64-NEXT:    vand.vx v24, v8, a3
1240; RV64-NEXT:    lui a3, 61681
1241; RV64-NEXT:    vor.vv v16, v0, v16
1242; RV64-NEXT:    vand.vx v0, v8, a4
1243; RV64-NEXT:    lui a4, 209715
1244; RV64-NEXT:    vsll.vi v24, v24, 24
1245; RV64-NEXT:    vsll.vi v0, v0, 8
1246; RV64-NEXT:    vor.vv v24, v24, v0
1247; RV64-NEXT:    vsll.vx v0, v8, a1
1248; RV64-NEXT:    lui a1, 349525
1249; RV64-NEXT:    addiw a3, a3, -241
1250; RV64-NEXT:    addiw a4, a4, 819
1251; RV64-NEXT:    addiw a1, a1, 1365
1252; RV64-NEXT:    vand.vx v8, v8, a2
1253; RV64-NEXT:    slli a2, a3, 32
1254; RV64-NEXT:    vsll.vx v8, v8, a0
1255; RV64-NEXT:    slli a0, a4, 32
1256; RV64-NEXT:    add a2, a3, a2
1257; RV64-NEXT:    slli a3, a1, 32
1258; RV64-NEXT:    add a0, a4, a0
1259; RV64-NEXT:    add a1, a1, a3
1260; RV64-NEXT:    vor.vv v8, v0, v8
1261; RV64-NEXT:    vor.vv v8, v8, v24
1262; RV64-NEXT:    vor.vv v8, v8, v16
1263; RV64-NEXT:    vsrl.vi v16, v8, 4
1264; RV64-NEXT:    vand.vx v8, v8, a2
1265; RV64-NEXT:    vand.vx v16, v16, a2
1266; RV64-NEXT:    vsll.vi v8, v8, 4
1267; RV64-NEXT:    vor.vv v8, v16, v8
1268; RV64-NEXT:    vsrl.vi v16, v8, 2
1269; RV64-NEXT:    vand.vx v8, v8, a0
1270; RV64-NEXT:    vand.vx v16, v16, a0
1271; RV64-NEXT:    vsll.vi v8, v8, 2
1272; RV64-NEXT:    vor.vv v8, v16, v8
1273; RV64-NEXT:    vsrl.vi v16, v8, 1
1274; RV64-NEXT:    vand.vx v8, v8, a1
1275; RV64-NEXT:    vand.vx v16, v16, a1
1276; RV64-NEXT:    vadd.vv v8, v8, v8
1277; RV64-NEXT:    vor.vv v8, v16, v8
1278; RV64-NEXT:    ret
1279;
1280; CHECK-ZVBB-LABEL: bitreverse_nxv8i64:
1281; CHECK-ZVBB:       # %bb.0:
1282; CHECK-ZVBB-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
1283; CHECK-ZVBB-NEXT:    vbrev.v v8, v8
1284; CHECK-ZVBB-NEXT:    ret
1285  %a = call <vscale x 8 x i64> @llvm.bitreverse.nxv8i64(<vscale x 8 x i64> %va)
1286  ret <vscale x 8 x i64> %a
1287}
1288declare <vscale x 8 x i64> @llvm.bitreverse.nxv8i64(<vscale x 8 x i64>)
1289