xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll (revision 9122c5235ec85ce0c0ad337e862b006e7b349d84)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
4; RUN: llc -mtriple=riscv32 -mattr=+v,+zvkb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVKB
5; RUN: llc -mtriple=riscv64 -mattr=+v,+zvkb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVKB
6
7define <vscale x 1 x i16> @bswap_nxv1i16(<vscale x 1 x i16> %va) {
8; CHECK-LABEL: bswap_nxv1i16:
9; CHECK:       # %bb.0:
10; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
11; CHECK-NEXT:    vsrl.vi v9, v8, 8
12; CHECK-NEXT:    vsll.vi v8, v8, 8
13; CHECK-NEXT:    vor.vv v8, v8, v9
14; CHECK-NEXT:    ret
15;
16; CHECK-ZVKB-LABEL: bswap_nxv1i16:
17; CHECK-ZVKB:       # %bb.0:
18; CHECK-ZVKB-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
19; CHECK-ZVKB-NEXT:    vrev8.v v8, v8
20; CHECK-ZVKB-NEXT:    ret
21  %a = call <vscale x 1 x i16> @llvm.bswap.nxv1i16(<vscale x 1 x i16> %va)
22  ret <vscale x 1 x i16> %a
23}
24declare <vscale x 1 x i16> @llvm.bswap.nxv1i16(<vscale x 1 x i16>)
25
26define <vscale x 2 x i16> @bswap_nxv2i16(<vscale x 2 x i16> %va) {
27; CHECK-LABEL: bswap_nxv2i16:
28; CHECK:       # %bb.0:
29; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
30; CHECK-NEXT:    vsrl.vi v9, v8, 8
31; CHECK-NEXT:    vsll.vi v8, v8, 8
32; CHECK-NEXT:    vor.vv v8, v8, v9
33; CHECK-NEXT:    ret
34;
35; CHECK-ZVKB-LABEL: bswap_nxv2i16:
36; CHECK-ZVKB:       # %bb.0:
37; CHECK-ZVKB-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
38; CHECK-ZVKB-NEXT:    vrev8.v v8, v8
39; CHECK-ZVKB-NEXT:    ret
40  %a = call <vscale x 2 x i16> @llvm.bswap.nxv2i16(<vscale x 2 x i16> %va)
41  ret <vscale x 2 x i16> %a
42}
43declare <vscale x 2 x i16> @llvm.bswap.nxv2i16(<vscale x 2 x i16>)
44
45define <vscale x 4 x i16> @bswap_nxv4i16(<vscale x 4 x i16> %va) {
46; CHECK-LABEL: bswap_nxv4i16:
47; CHECK:       # %bb.0:
48; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
49; CHECK-NEXT:    vsrl.vi v9, v8, 8
50; CHECK-NEXT:    vsll.vi v8, v8, 8
51; CHECK-NEXT:    vor.vv v8, v8, v9
52; CHECK-NEXT:    ret
53;
54; CHECK-ZVKB-LABEL: bswap_nxv4i16:
55; CHECK-ZVKB:       # %bb.0:
56; CHECK-ZVKB-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
57; CHECK-ZVKB-NEXT:    vrev8.v v8, v8
58; CHECK-ZVKB-NEXT:    ret
59  %a = call <vscale x 4 x i16> @llvm.bswap.nxv4i16(<vscale x 4 x i16> %va)
60  ret <vscale x 4 x i16> %a
61}
62declare <vscale x 4 x i16> @llvm.bswap.nxv4i16(<vscale x 4 x i16>)
63
64define <vscale x 8 x i16> @bswap_nxv8i16(<vscale x 8 x i16> %va) {
65; CHECK-LABEL: bswap_nxv8i16:
66; CHECK:       # %bb.0:
67; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
68; CHECK-NEXT:    vsrl.vi v10, v8, 8
69; CHECK-NEXT:    vsll.vi v8, v8, 8
70; CHECK-NEXT:    vor.vv v8, v8, v10
71; CHECK-NEXT:    ret
72;
73; CHECK-ZVKB-LABEL: bswap_nxv8i16:
74; CHECK-ZVKB:       # %bb.0:
75; CHECK-ZVKB-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
76; CHECK-ZVKB-NEXT:    vrev8.v v8, v8
77; CHECK-ZVKB-NEXT:    ret
78  %a = call <vscale x 8 x i16> @llvm.bswap.nxv8i16(<vscale x 8 x i16> %va)
79  ret <vscale x 8 x i16> %a
80}
81declare <vscale x 8 x i16> @llvm.bswap.nxv8i16(<vscale x 8 x i16>)
82
83define <vscale x 16 x i16> @bswap_nxv16i16(<vscale x 16 x i16> %va) {
84; CHECK-LABEL: bswap_nxv16i16:
85; CHECK:       # %bb.0:
86; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
87; CHECK-NEXT:    vsrl.vi v12, v8, 8
88; CHECK-NEXT:    vsll.vi v8, v8, 8
89; CHECK-NEXT:    vor.vv v8, v8, v12
90; CHECK-NEXT:    ret
91;
92; CHECK-ZVKB-LABEL: bswap_nxv16i16:
93; CHECK-ZVKB:       # %bb.0:
94; CHECK-ZVKB-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
95; CHECK-ZVKB-NEXT:    vrev8.v v8, v8
96; CHECK-ZVKB-NEXT:    ret
97  %a = call <vscale x 16 x i16> @llvm.bswap.nxv16i16(<vscale x 16 x i16> %va)
98  ret <vscale x 16 x i16> %a
99}
100declare <vscale x 16 x i16> @llvm.bswap.nxv16i16(<vscale x 16 x i16>)
101
102define <vscale x 32 x i16> @bswap_nxv32i16(<vscale x 32 x i16> %va) {
103; CHECK-LABEL: bswap_nxv32i16:
104; CHECK:       # %bb.0:
105; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
106; CHECK-NEXT:    vsrl.vi v16, v8, 8
107; CHECK-NEXT:    vsll.vi v8, v8, 8
108; CHECK-NEXT:    vor.vv v8, v8, v16
109; CHECK-NEXT:    ret
110;
111; CHECK-ZVKB-LABEL: bswap_nxv32i16:
112; CHECK-ZVKB:       # %bb.0:
113; CHECK-ZVKB-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
114; CHECK-ZVKB-NEXT:    vrev8.v v8, v8
115; CHECK-ZVKB-NEXT:    ret
116  %a = call <vscale x 32 x i16> @llvm.bswap.nxv32i16(<vscale x 32 x i16> %va)
117  ret <vscale x 32 x i16> %a
118}
119declare <vscale x 32 x i16> @llvm.bswap.nxv32i16(<vscale x 32 x i16>)
120
121define <vscale x 1 x i32> @bswap_nxv1i32(<vscale x 1 x i32> %va) {
122; CHECK-LABEL: bswap_nxv1i32:
123; CHECK:       # %bb.0:
124; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
125; CHECK-NEXT:    vsrl.vi v9, v8, 8
126; CHECK-NEXT:    lui a0, 16
127; CHECK-NEXT:    vsrl.vi v10, v8, 24
128; CHECK-NEXT:    addi a0, a0, -256
129; CHECK-NEXT:    vand.vx v9, v9, a0
130; CHECK-NEXT:    vor.vv v9, v9, v10
131; CHECK-NEXT:    vand.vx v10, v8, a0
132; CHECK-NEXT:    vsll.vi v10, v10, 8
133; CHECK-NEXT:    vsll.vi v8, v8, 24
134; CHECK-NEXT:    vor.vv v8, v8, v10
135; CHECK-NEXT:    vor.vv v8, v8, v9
136; CHECK-NEXT:    ret
137;
138; CHECK-ZVKB-LABEL: bswap_nxv1i32:
139; CHECK-ZVKB:       # %bb.0:
140; CHECK-ZVKB-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
141; CHECK-ZVKB-NEXT:    vrev8.v v8, v8
142; CHECK-ZVKB-NEXT:    ret
143  %a = call <vscale x 1 x i32> @llvm.bswap.nxv1i32(<vscale x 1 x i32> %va)
144  ret <vscale x 1 x i32> %a
145}
146declare <vscale x 1 x i32> @llvm.bswap.nxv1i32(<vscale x 1 x i32>)
147
148define <vscale x 2 x i32> @bswap_nxv2i32(<vscale x 2 x i32> %va) {
149; CHECK-LABEL: bswap_nxv2i32:
150; CHECK:       # %bb.0:
151; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
152; CHECK-NEXT:    vsrl.vi v9, v8, 8
153; CHECK-NEXT:    lui a0, 16
154; CHECK-NEXT:    vsrl.vi v10, v8, 24
155; CHECK-NEXT:    addi a0, a0, -256
156; CHECK-NEXT:    vand.vx v9, v9, a0
157; CHECK-NEXT:    vor.vv v9, v9, v10
158; CHECK-NEXT:    vand.vx v10, v8, a0
159; CHECK-NEXT:    vsll.vi v10, v10, 8
160; CHECK-NEXT:    vsll.vi v8, v8, 24
161; CHECK-NEXT:    vor.vv v8, v8, v10
162; CHECK-NEXT:    vor.vv v8, v8, v9
163; CHECK-NEXT:    ret
164;
165; CHECK-ZVKB-LABEL: bswap_nxv2i32:
166; CHECK-ZVKB:       # %bb.0:
167; CHECK-ZVKB-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
168; CHECK-ZVKB-NEXT:    vrev8.v v8, v8
169; CHECK-ZVKB-NEXT:    ret
170  %a = call <vscale x 2 x i32> @llvm.bswap.nxv2i32(<vscale x 2 x i32> %va)
171  ret <vscale x 2 x i32> %a
172}
173declare <vscale x 2 x i32> @llvm.bswap.nxv2i32(<vscale x 2 x i32>)
174
175define <vscale x 4 x i32> @bswap_nxv4i32(<vscale x 4 x i32> %va) {
176; CHECK-LABEL: bswap_nxv4i32:
177; CHECK:       # %bb.0:
178; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
179; CHECK-NEXT:    vsrl.vi v10, v8, 8
180; CHECK-NEXT:    lui a0, 16
181; CHECK-NEXT:    vsrl.vi v12, v8, 24
182; CHECK-NEXT:    addi a0, a0, -256
183; CHECK-NEXT:    vand.vx v10, v10, a0
184; CHECK-NEXT:    vor.vv v10, v10, v12
185; CHECK-NEXT:    vand.vx v12, v8, a0
186; CHECK-NEXT:    vsll.vi v12, v12, 8
187; CHECK-NEXT:    vsll.vi v8, v8, 24
188; CHECK-NEXT:    vor.vv v8, v8, v12
189; CHECK-NEXT:    vor.vv v8, v8, v10
190; CHECK-NEXT:    ret
191;
192; CHECK-ZVKB-LABEL: bswap_nxv4i32:
193; CHECK-ZVKB:       # %bb.0:
194; CHECK-ZVKB-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
195; CHECK-ZVKB-NEXT:    vrev8.v v8, v8
196; CHECK-ZVKB-NEXT:    ret
197  %a = call <vscale x 4 x i32> @llvm.bswap.nxv4i32(<vscale x 4 x i32> %va)
198  ret <vscale x 4 x i32> %a
199}
200declare <vscale x 4 x i32> @llvm.bswap.nxv4i32(<vscale x 4 x i32>)
201
202define <vscale x 8 x i32> @bswap_nxv8i32(<vscale x 8 x i32> %va) {
203; CHECK-LABEL: bswap_nxv8i32:
204; CHECK:       # %bb.0:
205; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
206; CHECK-NEXT:    vsrl.vi v12, v8, 8
207; CHECK-NEXT:    lui a0, 16
208; CHECK-NEXT:    vsrl.vi v16, v8, 24
209; CHECK-NEXT:    addi a0, a0, -256
210; CHECK-NEXT:    vand.vx v12, v12, a0
211; CHECK-NEXT:    vor.vv v12, v12, v16
212; CHECK-NEXT:    vand.vx v16, v8, a0
213; CHECK-NEXT:    vsll.vi v16, v16, 8
214; CHECK-NEXT:    vsll.vi v8, v8, 24
215; CHECK-NEXT:    vor.vv v8, v8, v16
216; CHECK-NEXT:    vor.vv v8, v8, v12
217; CHECK-NEXT:    ret
218;
219; CHECK-ZVKB-LABEL: bswap_nxv8i32:
220; CHECK-ZVKB:       # %bb.0:
221; CHECK-ZVKB-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
222; CHECK-ZVKB-NEXT:    vrev8.v v8, v8
223; CHECK-ZVKB-NEXT:    ret
224  %a = call <vscale x 8 x i32> @llvm.bswap.nxv8i32(<vscale x 8 x i32> %va)
225  ret <vscale x 8 x i32> %a
226}
227declare <vscale x 8 x i32> @llvm.bswap.nxv8i32(<vscale x 8 x i32>)
228
229define <vscale x 16 x i32> @bswap_nxv16i32(<vscale x 16 x i32> %va) {
230; CHECK-LABEL: bswap_nxv16i32:
231; CHECK:       # %bb.0:
232; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
233; CHECK-NEXT:    vsrl.vi v16, v8, 8
234; CHECK-NEXT:    lui a0, 16
235; CHECK-NEXT:    vsrl.vi v24, v8, 24
236; CHECK-NEXT:    addi a0, a0, -256
237; CHECK-NEXT:    vand.vx v16, v16, a0
238; CHECK-NEXT:    vor.vv v16, v16, v24
239; CHECK-NEXT:    vand.vx v24, v8, a0
240; CHECK-NEXT:    vsll.vi v24, v24, 8
241; CHECK-NEXT:    vsll.vi v8, v8, 24
242; CHECK-NEXT:    vor.vv v8, v8, v24
243; CHECK-NEXT:    vor.vv v8, v8, v16
244; CHECK-NEXT:    ret
245;
246; CHECK-ZVKB-LABEL: bswap_nxv16i32:
247; CHECK-ZVKB:       # %bb.0:
248; CHECK-ZVKB-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
249; CHECK-ZVKB-NEXT:    vrev8.v v8, v8
250; CHECK-ZVKB-NEXT:    ret
251  %a = call <vscale x 16 x i32> @llvm.bswap.nxv16i32(<vscale x 16 x i32> %va)
252  ret <vscale x 16 x i32> %a
253}
254declare <vscale x 16 x i32> @llvm.bswap.nxv16i32(<vscale x 16 x i32>)
255
256define <vscale x 1 x i64> @bswap_nxv1i64(<vscale x 1 x i64> %va) {
257; RV32-LABEL: bswap_nxv1i64:
258; RV32:       # %bb.0:
259; RV32-NEXT:    addi sp, sp, -16
260; RV32-NEXT:    .cfi_def_cfa_offset 16
261; RV32-NEXT:    lui a0, 1044480
262; RV32-NEXT:    li a1, 56
263; RV32-NEXT:    li a2, 40
264; RV32-NEXT:    lui a3, 16
265; RV32-NEXT:    vsetvli a4, zero, e64, m1, ta, ma
266; RV32-NEXT:    vsrl.vi v9, v8, 24
267; RV32-NEXT:    lui a4, 4080
268; RV32-NEXT:    addi a5, sp, 8
269; RV32-NEXT:    sw a0, 8(sp)
270; RV32-NEXT:    sw zero, 12(sp)
271; RV32-NEXT:    vsrl.vx v10, v8, a1
272; RV32-NEXT:    vsrl.vx v11, v8, a2
273; RV32-NEXT:    addi a0, a3, -256
274; RV32-NEXT:    vsll.vx v12, v8, a1
275; RV32-NEXT:    vand.vx v11, v11, a0
276; RV32-NEXT:    vlse64.v v13, (a5), zero
277; RV32-NEXT:    vor.vv v10, v11, v10
278; RV32-NEXT:    vand.vx v11, v8, a0
279; RV32-NEXT:    vsll.vx v11, v11, a2
280; RV32-NEXT:    vor.vv v11, v12, v11
281; RV32-NEXT:    vsrl.vi v12, v8, 8
282; RV32-NEXT:    vand.vx v9, v9, a4
283; RV32-NEXT:    vand.vv v12, v12, v13
284; RV32-NEXT:    vor.vv v9, v12, v9
285; RV32-NEXT:    vand.vv v12, v8, v13
286; RV32-NEXT:    vand.vx v8, v8, a4
287; RV32-NEXT:    vsll.vi v8, v8, 24
288; RV32-NEXT:    vsll.vi v12, v12, 8
289; RV32-NEXT:    vor.vv v9, v9, v10
290; RV32-NEXT:    vor.vv v8, v8, v12
291; RV32-NEXT:    vor.vv v8, v11, v8
292; RV32-NEXT:    vor.vv v8, v8, v9
293; RV32-NEXT:    addi sp, sp, 16
294; RV32-NEXT:    .cfi_def_cfa_offset 0
295; RV32-NEXT:    ret
296;
297; RV64-LABEL: bswap_nxv1i64:
298; RV64:       # %bb.0:
299; RV64-NEXT:    li a0, 56
300; RV64-NEXT:    li a1, 40
301; RV64-NEXT:    lui a2, 16
302; RV64-NEXT:    vsetvli a3, zero, e64, m1, ta, ma
303; RV64-NEXT:    vsrl.vi v9, v8, 24
304; RV64-NEXT:    lui a3, 4080
305; RV64-NEXT:    vsrl.vx v10, v8, a0
306; RV64-NEXT:    vsrl.vx v11, v8, a1
307; RV64-NEXT:    addiw a2, a2, -256
308; RV64-NEXT:    vand.vx v11, v11, a2
309; RV64-NEXT:    vor.vv v10, v11, v10
310; RV64-NEXT:    vsrl.vi v11, v8, 8
311; RV64-NEXT:    li a4, 255
312; RV64-NEXT:    vand.vx v9, v9, a3
313; RV64-NEXT:    slli a4, a4, 24
314; RV64-NEXT:    vand.vx v11, v11, a4
315; RV64-NEXT:    vor.vv v9, v11, v9
316; RV64-NEXT:    vand.vx v11, v8, a3
317; RV64-NEXT:    vsll.vi v11, v11, 24
318; RV64-NEXT:    vor.vv v9, v9, v10
319; RV64-NEXT:    vand.vx v10, v8, a4
320; RV64-NEXT:    vsll.vi v10, v10, 8
321; RV64-NEXT:    vor.vv v10, v11, v10
322; RV64-NEXT:    vsll.vx v11, v8, a0
323; RV64-NEXT:    vand.vx v8, v8, a2
324; RV64-NEXT:    vsll.vx v8, v8, a1
325; RV64-NEXT:    vor.vv v8, v11, v8
326; RV64-NEXT:    vor.vv v8, v8, v10
327; RV64-NEXT:    vor.vv v8, v8, v9
328; RV64-NEXT:    ret
329;
330; CHECK-ZVKB-LABEL: bswap_nxv1i64:
331; CHECK-ZVKB:       # %bb.0:
332; CHECK-ZVKB-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
333; CHECK-ZVKB-NEXT:    vrev8.v v8, v8
334; CHECK-ZVKB-NEXT:    ret
335  %a = call <vscale x 1 x i64> @llvm.bswap.nxv1i64(<vscale x 1 x i64> %va)
336  ret <vscale x 1 x i64> %a
337}
338declare <vscale x 1 x i64> @llvm.bswap.nxv1i64(<vscale x 1 x i64>)
339
340define <vscale x 2 x i64> @bswap_nxv2i64(<vscale x 2 x i64> %va) {
341; RV32-LABEL: bswap_nxv2i64:
342; RV32:       # %bb.0:
343; RV32-NEXT:    addi sp, sp, -16
344; RV32-NEXT:    .cfi_def_cfa_offset 16
345; RV32-NEXT:    lui a0, 1044480
346; RV32-NEXT:    li a1, 56
347; RV32-NEXT:    li a2, 40
348; RV32-NEXT:    lui a3, 16
349; RV32-NEXT:    vsetvli a4, zero, e64, m2, ta, ma
350; RV32-NEXT:    vsrl.vi v10, v8, 24
351; RV32-NEXT:    lui a4, 4080
352; RV32-NEXT:    addi a5, sp, 8
353; RV32-NEXT:    sw a0, 8(sp)
354; RV32-NEXT:    sw zero, 12(sp)
355; RV32-NEXT:    vsrl.vx v12, v8, a1
356; RV32-NEXT:    vsrl.vx v14, v8, a2
357; RV32-NEXT:    addi a0, a3, -256
358; RV32-NEXT:    vsll.vx v16, v8, a1
359; RV32-NEXT:    vand.vx v14, v14, a0
360; RV32-NEXT:    vlse64.v v18, (a5), zero
361; RV32-NEXT:    vor.vv v12, v14, v12
362; RV32-NEXT:    vand.vx v14, v8, a0
363; RV32-NEXT:    vsll.vx v14, v14, a2
364; RV32-NEXT:    vor.vv v14, v16, v14
365; RV32-NEXT:    vsrl.vi v16, v8, 8
366; RV32-NEXT:    vand.vx v10, v10, a4
367; RV32-NEXT:    vand.vv v16, v16, v18
368; RV32-NEXT:    vor.vv v10, v16, v10
369; RV32-NEXT:    vand.vv v16, v8, v18
370; RV32-NEXT:    vand.vx v8, v8, a4
371; RV32-NEXT:    vsll.vi v8, v8, 24
372; RV32-NEXT:    vsll.vi v16, v16, 8
373; RV32-NEXT:    vor.vv v10, v10, v12
374; RV32-NEXT:    vor.vv v8, v8, v16
375; RV32-NEXT:    vor.vv v8, v14, v8
376; RV32-NEXT:    vor.vv v8, v8, v10
377; RV32-NEXT:    addi sp, sp, 16
378; RV32-NEXT:    .cfi_def_cfa_offset 0
379; RV32-NEXT:    ret
380;
381; RV64-LABEL: bswap_nxv2i64:
382; RV64:       # %bb.0:
383; RV64-NEXT:    li a0, 56
384; RV64-NEXT:    li a1, 40
385; RV64-NEXT:    lui a2, 16
386; RV64-NEXT:    vsetvli a3, zero, e64, m2, ta, ma
387; RV64-NEXT:    vsrl.vi v10, v8, 24
388; RV64-NEXT:    lui a3, 4080
389; RV64-NEXT:    vsrl.vx v12, v8, a0
390; RV64-NEXT:    vsrl.vx v14, v8, a1
391; RV64-NEXT:    addiw a2, a2, -256
392; RV64-NEXT:    vand.vx v14, v14, a2
393; RV64-NEXT:    vor.vv v12, v14, v12
394; RV64-NEXT:    vsrl.vi v14, v8, 8
395; RV64-NEXT:    li a4, 255
396; RV64-NEXT:    vand.vx v10, v10, a3
397; RV64-NEXT:    slli a4, a4, 24
398; RV64-NEXT:    vand.vx v14, v14, a4
399; RV64-NEXT:    vor.vv v10, v14, v10
400; RV64-NEXT:    vand.vx v14, v8, a3
401; RV64-NEXT:    vsll.vi v14, v14, 24
402; RV64-NEXT:    vor.vv v10, v10, v12
403; RV64-NEXT:    vand.vx v12, v8, a4
404; RV64-NEXT:    vsll.vi v12, v12, 8
405; RV64-NEXT:    vor.vv v12, v14, v12
406; RV64-NEXT:    vsll.vx v14, v8, a0
407; RV64-NEXT:    vand.vx v8, v8, a2
408; RV64-NEXT:    vsll.vx v8, v8, a1
409; RV64-NEXT:    vor.vv v8, v14, v8
410; RV64-NEXT:    vor.vv v8, v8, v12
411; RV64-NEXT:    vor.vv v8, v8, v10
412; RV64-NEXT:    ret
413;
414; CHECK-ZVKB-LABEL: bswap_nxv2i64:
415; CHECK-ZVKB:       # %bb.0:
416; CHECK-ZVKB-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
417; CHECK-ZVKB-NEXT:    vrev8.v v8, v8
418; CHECK-ZVKB-NEXT:    ret
419  %a = call <vscale x 2 x i64> @llvm.bswap.nxv2i64(<vscale x 2 x i64> %va)
420  ret <vscale x 2 x i64> %a
421}
422declare <vscale x 2 x i64> @llvm.bswap.nxv2i64(<vscale x 2 x i64>)
423
424define <vscale x 4 x i64> @bswap_nxv4i64(<vscale x 4 x i64> %va) {
425; RV32-LABEL: bswap_nxv4i64:
426; RV32:       # %bb.0:
427; RV32-NEXT:    addi sp, sp, -16
428; RV32-NEXT:    .cfi_def_cfa_offset 16
429; RV32-NEXT:    lui a0, 1044480
430; RV32-NEXT:    li a1, 56
431; RV32-NEXT:    li a2, 40
432; RV32-NEXT:    lui a3, 16
433; RV32-NEXT:    vsetvli a4, zero, e64, m4, ta, ma
434; RV32-NEXT:    vsrl.vi v12, v8, 24
435; RV32-NEXT:    lui a4, 4080
436; RV32-NEXT:    addi a5, sp, 8
437; RV32-NEXT:    sw a0, 8(sp)
438; RV32-NEXT:    sw zero, 12(sp)
439; RV32-NEXT:    vsrl.vx v16, v8, a1
440; RV32-NEXT:    vsrl.vx v20, v8, a2
441; RV32-NEXT:    addi a0, a3, -256
442; RV32-NEXT:    vsll.vx v24, v8, a1
443; RV32-NEXT:    vand.vx v20, v20, a0
444; RV32-NEXT:    vlse64.v v28, (a5), zero
445; RV32-NEXT:    vor.vv v16, v20, v16
446; RV32-NEXT:    vand.vx v20, v8, a0
447; RV32-NEXT:    vsll.vx v20, v20, a2
448; RV32-NEXT:    vor.vv v20, v24, v20
449; RV32-NEXT:    vsrl.vi v24, v8, 8
450; RV32-NEXT:    vand.vx v12, v12, a4
451; RV32-NEXT:    vand.vv v24, v24, v28
452; RV32-NEXT:    vor.vv v12, v24, v12
453; RV32-NEXT:    vand.vv v24, v8, v28
454; RV32-NEXT:    vand.vx v8, v8, a4
455; RV32-NEXT:    vsll.vi v8, v8, 24
456; RV32-NEXT:    vsll.vi v24, v24, 8
457; RV32-NEXT:    vor.vv v12, v12, v16
458; RV32-NEXT:    vor.vv v8, v8, v24
459; RV32-NEXT:    vor.vv v8, v20, v8
460; RV32-NEXT:    vor.vv v8, v8, v12
461; RV32-NEXT:    addi sp, sp, 16
462; RV32-NEXT:    .cfi_def_cfa_offset 0
463; RV32-NEXT:    ret
464;
465; RV64-LABEL: bswap_nxv4i64:
466; RV64:       # %bb.0:
467; RV64-NEXT:    li a0, 56
468; RV64-NEXT:    li a1, 40
469; RV64-NEXT:    lui a2, 16
470; RV64-NEXT:    vsetvli a3, zero, e64, m4, ta, ma
471; RV64-NEXT:    vsrl.vi v12, v8, 24
472; RV64-NEXT:    lui a3, 4080
473; RV64-NEXT:    vsrl.vx v16, v8, a0
474; RV64-NEXT:    vsrl.vx v20, v8, a1
475; RV64-NEXT:    addiw a2, a2, -256
476; RV64-NEXT:    vand.vx v20, v20, a2
477; RV64-NEXT:    vor.vv v16, v20, v16
478; RV64-NEXT:    vsrl.vi v20, v8, 8
479; RV64-NEXT:    li a4, 255
480; RV64-NEXT:    vand.vx v12, v12, a3
481; RV64-NEXT:    slli a4, a4, 24
482; RV64-NEXT:    vand.vx v20, v20, a4
483; RV64-NEXT:    vor.vv v12, v20, v12
484; RV64-NEXT:    vand.vx v20, v8, a3
485; RV64-NEXT:    vsll.vi v20, v20, 24
486; RV64-NEXT:    vor.vv v12, v12, v16
487; RV64-NEXT:    vand.vx v16, v8, a4
488; RV64-NEXT:    vsll.vi v16, v16, 8
489; RV64-NEXT:    vor.vv v16, v20, v16
490; RV64-NEXT:    vsll.vx v20, v8, a0
491; RV64-NEXT:    vand.vx v8, v8, a2
492; RV64-NEXT:    vsll.vx v8, v8, a1
493; RV64-NEXT:    vor.vv v8, v20, v8
494; RV64-NEXT:    vor.vv v8, v8, v16
495; RV64-NEXT:    vor.vv v8, v8, v12
496; RV64-NEXT:    ret
497;
498; CHECK-ZVKB-LABEL: bswap_nxv4i64:
499; CHECK-ZVKB:       # %bb.0:
500; CHECK-ZVKB-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
501; CHECK-ZVKB-NEXT:    vrev8.v v8, v8
502; CHECK-ZVKB-NEXT:    ret
503  %a = call <vscale x 4 x i64> @llvm.bswap.nxv4i64(<vscale x 4 x i64> %va)
504  ret <vscale x 4 x i64> %a
505}
506declare <vscale x 4 x i64> @llvm.bswap.nxv4i64(<vscale x 4 x i64>)
507
508define <vscale x 8 x i64> @bswap_nxv8i64(<vscale x 8 x i64> %va) {
509; RV32-LABEL: bswap_nxv8i64:
510; RV32:       # %bb.0:
511; RV32-NEXT:    addi sp, sp, -16
512; RV32-NEXT:    .cfi_def_cfa_offset 16
513; RV32-NEXT:    csrr a0, vlenb
514; RV32-NEXT:    slli a0, a0, 4
515; RV32-NEXT:    sub sp, sp, a0
516; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
517; RV32-NEXT:    lui a0, 1044480
518; RV32-NEXT:    li a1, 56
519; RV32-NEXT:    li a2, 40
520; RV32-NEXT:    lui a3, 16
521; RV32-NEXT:    lui a4, 4080
522; RV32-NEXT:    addi a5, sp, 8
523; RV32-NEXT:    sw a0, 8(sp)
524; RV32-NEXT:    sw zero, 12(sp)
525; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
526; RV32-NEXT:    vsrl.vx v16, v8, a1
527; RV32-NEXT:    vsrl.vx v24, v8, a2
528; RV32-NEXT:    addi a0, a3, -256
529; RV32-NEXT:    vsll.vx v0, v8, a1
530; RV32-NEXT:    vand.vx v24, v24, a0
531; RV32-NEXT:    vor.vv v16, v24, v16
532; RV32-NEXT:    addi a1, sp, 16
533; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
534; RV32-NEXT:    vand.vx v16, v8, a0
535; RV32-NEXT:    vsll.vx v16, v16, a2
536; RV32-NEXT:    vor.vv v16, v0, v16
537; RV32-NEXT:    csrr a0, vlenb
538; RV32-NEXT:    slli a0, a0, 3
539; RV32-NEXT:    add a0, sp, a0
540; RV32-NEXT:    addi a0, a0, 16
541; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
542; RV32-NEXT:    vlse64.v v0, (a5), zero
543; RV32-NEXT:    vsrl.vi v16, v8, 24
544; RV32-NEXT:    vand.vx v16, v16, a4
545; RV32-NEXT:    vsrl.vi v24, v8, 8
546; RV32-NEXT:    vand.vv v24, v24, v0
547; RV32-NEXT:    vor.vv v16, v24, v16
548; RV32-NEXT:    addi a0, sp, 16
549; RV32-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
550; RV32-NEXT:    vor.vv v24, v16, v24
551; RV32-NEXT:    vand.vv v16, v8, v0
552; RV32-NEXT:    vand.vx v8, v8, a4
553; RV32-NEXT:    vsll.vi v8, v8, 24
554; RV32-NEXT:    vsll.vi v16, v16, 8
555; RV32-NEXT:    vor.vv v8, v8, v16
556; RV32-NEXT:    csrr a0, vlenb
557; RV32-NEXT:    slli a0, a0, 3
558; RV32-NEXT:    add a0, sp, a0
559; RV32-NEXT:    addi a0, a0, 16
560; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
561; RV32-NEXT:    vor.vv v8, v16, v8
562; RV32-NEXT:    vor.vv v8, v8, v24
563; RV32-NEXT:    csrr a0, vlenb
564; RV32-NEXT:    slli a0, a0, 4
565; RV32-NEXT:    add sp, sp, a0
566; RV32-NEXT:    .cfi_def_cfa sp, 16
567; RV32-NEXT:    addi sp, sp, 16
568; RV32-NEXT:    .cfi_def_cfa_offset 0
569; RV32-NEXT:    ret
570;
571; RV64-LABEL: bswap_nxv8i64:
572; RV64:       # %bb.0:
573; RV64-NEXT:    li a0, 56
574; RV64-NEXT:    li a1, 40
575; RV64-NEXT:    lui a2, 16
576; RV64-NEXT:    vsetvli a3, zero, e64, m8, ta, ma
577; RV64-NEXT:    vsrl.vi v24, v8, 24
578; RV64-NEXT:    lui a3, 4080
579; RV64-NEXT:    vsrl.vx v16, v8, a0
580; RV64-NEXT:    vsrl.vx v0, v8, a1
581; RV64-NEXT:    addiw a2, a2, -256
582; RV64-NEXT:    vand.vx v0, v0, a2
583; RV64-NEXT:    vor.vv v16, v0, v16
584; RV64-NEXT:    vsrl.vi v0, v8, 8
585; RV64-NEXT:    li a4, 255
586; RV64-NEXT:    vand.vx v24, v24, a3
587; RV64-NEXT:    slli a4, a4, 24
588; RV64-NEXT:    vand.vx v0, v0, a4
589; RV64-NEXT:    vor.vv v24, v0, v24
590; RV64-NEXT:    vand.vx v0, v8, a3
591; RV64-NEXT:    vsll.vi v0, v0, 24
592; RV64-NEXT:    vor.vv v16, v24, v16
593; RV64-NEXT:    vand.vx v24, v8, a4
594; RV64-NEXT:    vsll.vi v24, v24, 8
595; RV64-NEXT:    vor.vv v24, v0, v24
596; RV64-NEXT:    vsll.vx v0, v8, a0
597; RV64-NEXT:    vand.vx v8, v8, a2
598; RV64-NEXT:    vsll.vx v8, v8, a1
599; RV64-NEXT:    vor.vv v8, v0, v8
600; RV64-NEXT:    vor.vv v8, v8, v24
601; RV64-NEXT:    vor.vv v8, v8, v16
602; RV64-NEXT:    ret
603;
604; CHECK-ZVKB-LABEL: bswap_nxv8i64:
605; CHECK-ZVKB:       # %bb.0:
606; CHECK-ZVKB-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
607; CHECK-ZVKB-NEXT:    vrev8.v v8, v8
608; CHECK-ZVKB-NEXT:    ret
609  %a = call <vscale x 8 x i64> @llvm.bswap.nxv8i64(<vscale x 8 x i64> %va)
610  ret <vscale x 8 x i64> %a
611}
612declare <vscale x 8 x i64> @llvm.bswap.nxv8i64(<vscale x 8 x i64>)
613