xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll (revision 9122c5235ec85ce0c0ad337e862b006e7b349d84)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
4; RUN: llc -mtriple=riscv32 -mattr=+v,+zvkb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVKB
5; RUN: llc -mtriple=riscv64 -mattr=+v,+zvkb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVKB
6
7define void @bswap_v8i16(ptr %x, ptr %y) {
8; CHECK-LABEL: bswap_v8i16:
9; CHECK:       # %bb.0:
10; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
11; CHECK-NEXT:    vle16.v v8, (a0)
12; CHECK-NEXT:    vsrl.vi v9, v8, 8
13; CHECK-NEXT:    vsll.vi v8, v8, 8
14; CHECK-NEXT:    vor.vv v8, v8, v9
15; CHECK-NEXT:    vse16.v v8, (a0)
16; CHECK-NEXT:    ret
17;
18; ZVKB-LABEL: bswap_v8i16:
19; ZVKB:       # %bb.0:
20; ZVKB-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
21; ZVKB-NEXT:    vle16.v v8, (a0)
22; ZVKB-NEXT:    vrev8.v v8, v8
23; ZVKB-NEXT:    vse16.v v8, (a0)
24; ZVKB-NEXT:    ret
25  %a = load <8 x i16>, ptr %x
26  %b = load <8 x i16>, ptr %y
27  %c = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a)
28  store <8 x i16> %c, ptr %x
29  ret void
30}
31declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
32
33define void @bswap_v4i32(ptr %x, ptr %y) {
34; CHECK-LABEL: bswap_v4i32:
35; CHECK:       # %bb.0:
36; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
37; CHECK-NEXT:    vle32.v v8, (a0)
38; CHECK-NEXT:    lui a1, 16
39; CHECK-NEXT:    addi a1, a1, -256
40; CHECK-NEXT:    vsrl.vi v9, v8, 8
41; CHECK-NEXT:    vsrl.vi v10, v8, 24
42; CHECK-NEXT:    vand.vx v9, v9, a1
43; CHECK-NEXT:    vor.vv v9, v9, v10
44; CHECK-NEXT:    vand.vx v10, v8, a1
45; CHECK-NEXT:    vsll.vi v8, v8, 24
46; CHECK-NEXT:    vsll.vi v10, v10, 8
47; CHECK-NEXT:    vor.vv v8, v8, v10
48; CHECK-NEXT:    vor.vv v8, v8, v9
49; CHECK-NEXT:    vse32.v v8, (a0)
50; CHECK-NEXT:    ret
51;
52; ZVKB-LABEL: bswap_v4i32:
53; ZVKB:       # %bb.0:
54; ZVKB-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
55; ZVKB-NEXT:    vle32.v v8, (a0)
56; ZVKB-NEXT:    vrev8.v v8, v8
57; ZVKB-NEXT:    vse32.v v8, (a0)
58; ZVKB-NEXT:    ret
59  %a = load <4 x i32>, ptr %x
60  %b = load <4 x i32>, ptr %y
61  %c = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a)
62  store <4 x i32> %c, ptr %x
63  ret void
64}
65declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
66
67define void @bswap_v2i64(ptr %x, ptr %y) {
68; RV32-LABEL: bswap_v2i64:
69; RV32:       # %bb.0:
70; RV32-NEXT:    addi sp, sp, -16
71; RV32-NEXT:    .cfi_def_cfa_offset 16
72; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
73; RV32-NEXT:    vle64.v v8, (a0)
74; RV32-NEXT:    lui a1, 1044480
75; RV32-NEXT:    li a2, 56
76; RV32-NEXT:    li a3, 40
77; RV32-NEXT:    lui a4, 16
78; RV32-NEXT:    lui a5, 4080
79; RV32-NEXT:    addi a6, sp, 8
80; RV32-NEXT:    sw a1, 8(sp)
81; RV32-NEXT:    sw zero, 12(sp)
82; RV32-NEXT:    addi a1, a4, -256
83; RV32-NEXT:    vlse64.v v9, (a6), zero
84; RV32-NEXT:    vsrl.vx v10, v8, a2
85; RV32-NEXT:    vsrl.vx v11, v8, a3
86; RV32-NEXT:    vsrl.vi v12, v8, 24
87; RV32-NEXT:    vsll.vx v13, v8, a2
88; RV32-NEXT:    vand.vx v11, v11, a1
89; RV32-NEXT:    vor.vv v10, v11, v10
90; RV32-NEXT:    vand.vx v11, v8, a1
91; RV32-NEXT:    vsll.vx v11, v11, a3
92; RV32-NEXT:    vor.vv v11, v13, v11
93; RV32-NEXT:    vsrl.vi v13, v8, 8
94; RV32-NEXT:    vand.vx v12, v12, a5
95; RV32-NEXT:    vand.vv v13, v13, v9
96; RV32-NEXT:    vor.vv v12, v13, v12
97; RV32-NEXT:    vand.vv v9, v8, v9
98; RV32-NEXT:    vand.vx v8, v8, a5
99; RV32-NEXT:    vsll.vi v8, v8, 24
100; RV32-NEXT:    vsll.vi v9, v9, 8
101; RV32-NEXT:    vor.vv v10, v12, v10
102; RV32-NEXT:    vor.vv v8, v8, v9
103; RV32-NEXT:    vor.vv v8, v11, v8
104; RV32-NEXT:    vor.vv v8, v8, v10
105; RV32-NEXT:    vse64.v v8, (a0)
106; RV32-NEXT:    addi sp, sp, 16
107; RV32-NEXT:    .cfi_def_cfa_offset 0
108; RV32-NEXT:    ret
109;
110; RV64-LABEL: bswap_v2i64:
111; RV64:       # %bb.0:
112; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
113; RV64-NEXT:    vle64.v v8, (a0)
114; RV64-NEXT:    li a1, 56
115; RV64-NEXT:    li a2, 40
116; RV64-NEXT:    lui a3, 16
117; RV64-NEXT:    lui a4, 4080
118; RV64-NEXT:    li a5, 255
119; RV64-NEXT:    addiw a3, a3, -256
120; RV64-NEXT:    slli a5, a5, 24
121; RV64-NEXT:    vsrl.vx v9, v8, a1
122; RV64-NEXT:    vsrl.vx v10, v8, a2
123; RV64-NEXT:    vsrl.vi v11, v8, 24
124; RV64-NEXT:    vsrl.vi v12, v8, 8
125; RV64-NEXT:    vand.vx v10, v10, a3
126; RV64-NEXT:    vor.vv v9, v10, v9
127; RV64-NEXT:    vand.vx v10, v8, a5
128; RV64-NEXT:    vand.vx v11, v11, a4
129; RV64-NEXT:    vand.vx v12, v12, a5
130; RV64-NEXT:    vor.vv v11, v12, v11
131; RV64-NEXT:    vand.vx v12, v8, a4
132; RV64-NEXT:    vsll.vi v10, v10, 8
133; RV64-NEXT:    vsll.vi v12, v12, 24
134; RV64-NEXT:    vor.vv v10, v12, v10
135; RV64-NEXT:    vsll.vx v12, v8, a1
136; RV64-NEXT:    vand.vx v8, v8, a3
137; RV64-NEXT:    vsll.vx v8, v8, a2
138; RV64-NEXT:    vor.vv v8, v12, v8
139; RV64-NEXT:    vor.vv v9, v11, v9
140; RV64-NEXT:    vor.vv v8, v8, v10
141; RV64-NEXT:    vor.vv v8, v8, v9
142; RV64-NEXT:    vse64.v v8, (a0)
143; RV64-NEXT:    ret
144;
145; ZVKB-LABEL: bswap_v2i64:
146; ZVKB:       # %bb.0:
147; ZVKB-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
148; ZVKB-NEXT:    vle64.v v8, (a0)
149; ZVKB-NEXT:    vrev8.v v8, v8
150; ZVKB-NEXT:    vse64.v v8, (a0)
151; ZVKB-NEXT:    ret
152  %a = load <2 x i64>, ptr %x
153  %b = load <2 x i64>, ptr %y
154  %c = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a)
155  store <2 x i64> %c, ptr %x
156  ret void
157}
158declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
159
160define void @bswap_v16i16(ptr %x, ptr %y) {
161; CHECK-LABEL: bswap_v16i16:
162; CHECK:       # %bb.0:
163; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
164; CHECK-NEXT:    vle16.v v8, (a0)
165; CHECK-NEXT:    vsrl.vi v10, v8, 8
166; CHECK-NEXT:    vsll.vi v8, v8, 8
167; CHECK-NEXT:    vor.vv v8, v8, v10
168; CHECK-NEXT:    vse16.v v8, (a0)
169; CHECK-NEXT:    ret
170;
171; ZVKB-LABEL: bswap_v16i16:
172; ZVKB:       # %bb.0:
173; ZVKB-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
174; ZVKB-NEXT:    vle16.v v8, (a0)
175; ZVKB-NEXT:    vrev8.v v8, v8
176; ZVKB-NEXT:    vse16.v v8, (a0)
177; ZVKB-NEXT:    ret
178  %a = load <16 x i16>, ptr %x
179  %b = load <16 x i16>, ptr %y
180  %c = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a)
181  store <16 x i16> %c, ptr %x
182  ret void
183}
184declare <16 x i16> @llvm.bswap.v16i16(<16 x i16>)
185
186define void @bswap_v8i32(ptr %x, ptr %y) {
187; CHECK-LABEL: bswap_v8i32:
188; CHECK:       # %bb.0:
189; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
190; CHECK-NEXT:    vle32.v v8, (a0)
191; CHECK-NEXT:    lui a1, 16
192; CHECK-NEXT:    addi a1, a1, -256
193; CHECK-NEXT:    vsrl.vi v10, v8, 8
194; CHECK-NEXT:    vsrl.vi v12, v8, 24
195; CHECK-NEXT:    vand.vx v10, v10, a1
196; CHECK-NEXT:    vor.vv v10, v10, v12
197; CHECK-NEXT:    vand.vx v12, v8, a1
198; CHECK-NEXT:    vsll.vi v8, v8, 24
199; CHECK-NEXT:    vsll.vi v12, v12, 8
200; CHECK-NEXT:    vor.vv v8, v8, v12
201; CHECK-NEXT:    vor.vv v8, v8, v10
202; CHECK-NEXT:    vse32.v v8, (a0)
203; CHECK-NEXT:    ret
204;
205; ZVKB-LABEL: bswap_v8i32:
206; ZVKB:       # %bb.0:
207; ZVKB-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
208; ZVKB-NEXT:    vle32.v v8, (a0)
209; ZVKB-NEXT:    vrev8.v v8, v8
210; ZVKB-NEXT:    vse32.v v8, (a0)
211; ZVKB-NEXT:    ret
212  %a = load <8 x i32>, ptr %x
213  %b = load <8 x i32>, ptr %y
214  %c = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a)
215  store <8 x i32> %c, ptr %x
216  ret void
217}
218declare <8 x i32> @llvm.bswap.v8i32(<8 x i32>)
219
220define void @bswap_v4i64(ptr %x, ptr %y) {
221; RV32-LABEL: bswap_v4i64:
222; RV32:       # %bb.0:
223; RV32-NEXT:    addi sp, sp, -16
224; RV32-NEXT:    .cfi_def_cfa_offset 16
225; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
226; RV32-NEXT:    vle64.v v8, (a0)
227; RV32-NEXT:    lui a1, 1044480
228; RV32-NEXT:    li a2, 56
229; RV32-NEXT:    li a3, 40
230; RV32-NEXT:    lui a4, 16
231; RV32-NEXT:    lui a5, 4080
232; RV32-NEXT:    addi a6, sp, 8
233; RV32-NEXT:    sw a1, 8(sp)
234; RV32-NEXT:    sw zero, 12(sp)
235; RV32-NEXT:    addi a1, a4, -256
236; RV32-NEXT:    vlse64.v v10, (a6), zero
237; RV32-NEXT:    vsrl.vx v12, v8, a2
238; RV32-NEXT:    vsrl.vx v14, v8, a3
239; RV32-NEXT:    vsrl.vi v16, v8, 24
240; RV32-NEXT:    vsll.vx v18, v8, a2
241; RV32-NEXT:    vand.vx v14, v14, a1
242; RV32-NEXT:    vor.vv v12, v14, v12
243; RV32-NEXT:    vand.vx v14, v8, a1
244; RV32-NEXT:    vsll.vx v14, v14, a3
245; RV32-NEXT:    vor.vv v14, v18, v14
246; RV32-NEXT:    vsrl.vi v18, v8, 8
247; RV32-NEXT:    vand.vx v16, v16, a5
248; RV32-NEXT:    vand.vv v18, v18, v10
249; RV32-NEXT:    vor.vv v16, v18, v16
250; RV32-NEXT:    vand.vv v10, v8, v10
251; RV32-NEXT:    vand.vx v8, v8, a5
252; RV32-NEXT:    vsll.vi v8, v8, 24
253; RV32-NEXT:    vsll.vi v10, v10, 8
254; RV32-NEXT:    vor.vv v12, v16, v12
255; RV32-NEXT:    vor.vv v8, v8, v10
256; RV32-NEXT:    vor.vv v8, v14, v8
257; RV32-NEXT:    vor.vv v8, v8, v12
258; RV32-NEXT:    vse64.v v8, (a0)
259; RV32-NEXT:    addi sp, sp, 16
260; RV32-NEXT:    .cfi_def_cfa_offset 0
261; RV32-NEXT:    ret
262;
263; RV64-LABEL: bswap_v4i64:
264; RV64:       # %bb.0:
265; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
266; RV64-NEXT:    vle64.v v8, (a0)
267; RV64-NEXT:    li a1, 56
268; RV64-NEXT:    li a2, 40
269; RV64-NEXT:    lui a3, 16
270; RV64-NEXT:    lui a4, 4080
271; RV64-NEXT:    li a5, 255
272; RV64-NEXT:    addiw a3, a3, -256
273; RV64-NEXT:    slli a5, a5, 24
274; RV64-NEXT:    vsrl.vx v10, v8, a1
275; RV64-NEXT:    vsrl.vx v12, v8, a2
276; RV64-NEXT:    vsrl.vi v14, v8, 24
277; RV64-NEXT:    vsrl.vi v16, v8, 8
278; RV64-NEXT:    vand.vx v12, v12, a3
279; RV64-NEXT:    vor.vv v10, v12, v10
280; RV64-NEXT:    vand.vx v12, v8, a5
281; RV64-NEXT:    vand.vx v14, v14, a4
282; RV64-NEXT:    vand.vx v16, v16, a5
283; RV64-NEXT:    vor.vv v14, v16, v14
284; RV64-NEXT:    vand.vx v16, v8, a4
285; RV64-NEXT:    vsll.vi v12, v12, 8
286; RV64-NEXT:    vsll.vi v16, v16, 24
287; RV64-NEXT:    vor.vv v12, v16, v12
288; RV64-NEXT:    vsll.vx v16, v8, a1
289; RV64-NEXT:    vand.vx v8, v8, a3
290; RV64-NEXT:    vsll.vx v8, v8, a2
291; RV64-NEXT:    vor.vv v8, v16, v8
292; RV64-NEXT:    vor.vv v10, v14, v10
293; RV64-NEXT:    vor.vv v8, v8, v12
294; RV64-NEXT:    vor.vv v8, v8, v10
295; RV64-NEXT:    vse64.v v8, (a0)
296; RV64-NEXT:    ret
297;
298; ZVKB-LABEL: bswap_v4i64:
299; ZVKB:       # %bb.0:
300; ZVKB-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
301; ZVKB-NEXT:    vle64.v v8, (a0)
302; ZVKB-NEXT:    vrev8.v v8, v8
303; ZVKB-NEXT:    vse64.v v8, (a0)
304; ZVKB-NEXT:    ret
305  %a = load <4 x i64>, ptr %x
306  %b = load <4 x i64>, ptr %y
307  %c = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a)
308  store <4 x i64> %c, ptr %x
309  ret void
310}
311declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>)
312