xref: /llvm-project/llvm/test/CodeGen/AArch64/sve2-xar.ll (revision cc82f1290a1e2157a6c0530d78d8cc84d2b8553d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2; RUN: llc -mtriple=aarch64 -mattr=+sve < %s -o - | FileCheck --check-prefixes=CHECK,SVE %s
3; RUN: llc -mtriple=aarch64 -mattr=+sve2 < %s -o - | FileCheck --check-prefixes=CHECK,SVE2 %s
4
5define <vscale x 2 x i64> @xar_nxv2i64_l(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y) {
6; SVE-LABEL: xar_nxv2i64_l:
7; SVE:       // %bb.0:
8; SVE-NEXT:    eor z0.d, z0.d, z1.d
9; SVE-NEXT:    lsr z1.d, z0.d, #4
10; SVE-NEXT:    lsl z0.d, z0.d, #60
11; SVE-NEXT:    orr z0.d, z0.d, z1.d
12; SVE-NEXT:    ret
13;
14; SVE2-LABEL: xar_nxv2i64_l:
15; SVE2:       // %bb.0:
16; SVE2-NEXT:    xar z0.d, z0.d, z1.d, #4
17; SVE2-NEXT:    ret
18    %a = xor <vscale x 2 x i64> %x, %y
19    %b = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %a, <vscale x 2 x i64> splat (i64 60))
20    ret <vscale x 2 x i64> %b
21}
22
23define <vscale x 2 x i64> @xar_nxv2i64_r(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y) {
24; SVE-LABEL: xar_nxv2i64_r:
25; SVE:       // %bb.0:
26; SVE-NEXT:    eor z0.d, z0.d, z1.d
27; SVE-NEXT:    lsl z1.d, z0.d, #60
28; SVE-NEXT:    lsr z0.d, z0.d, #4
29; SVE-NEXT:    orr z0.d, z0.d, z1.d
30; SVE-NEXT:    ret
31;
32; SVE2-LABEL: xar_nxv2i64_r:
33; SVE2:       // %bb.0:
34; SVE2-NEXT:    xar z0.d, z0.d, z1.d, #4
35; SVE2-NEXT:    ret
36    %a = xor <vscale x 2 x i64> %x, %y
37    %b = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %a, <vscale x 2 x i64> splat (i64 4))
38    ret <vscale x 2 x i64> %b
39}
40
41
42define <vscale x 4 x i32> @xar_nxv4i32_l(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y) {
43; SVE-LABEL: xar_nxv4i32_l:
44; SVE:       // %bb.0:
45; SVE-NEXT:    eor z0.d, z0.d, z1.d
46; SVE-NEXT:    lsr z1.s, z0.s, #4
47; SVE-NEXT:    lsl z0.s, z0.s, #28
48; SVE-NEXT:    orr z0.d, z0.d, z1.d
49; SVE-NEXT:    ret
50;
51; SVE2-LABEL: xar_nxv4i32_l:
52; SVE2:       // %bb.0:
53; SVE2-NEXT:    xar z0.s, z0.s, z1.s, #4
54; SVE2-NEXT:    ret
55    %a = xor <vscale x 4 x i32> %x, %y
56    %b = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> splat (i32 28))
57    ret <vscale x 4 x i32> %b
58}
59
60define <vscale x 4 x i32> @xar_nxv4i32_r(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y) {
61; SVE-LABEL: xar_nxv4i32_r:
62; SVE:       // %bb.0:
63; SVE-NEXT:    eor z0.d, z0.d, z1.d
64; SVE-NEXT:    lsl z1.s, z0.s, #28
65; SVE-NEXT:    lsr z0.s, z0.s, #4
66; SVE-NEXT:    orr z0.d, z0.d, z1.d
67; SVE-NEXT:    ret
68;
69; SVE2-LABEL: xar_nxv4i32_r:
70; SVE2:       // %bb.0:
71; SVE2-NEXT:    xar z0.s, z0.s, z1.s, #4
72; SVE2-NEXT:    ret
73    %a = xor <vscale x 4 x i32> %x, %y
74    %b = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> splat (i32 4))
75    ret <vscale x 4 x i32> %b
76}
77
78define <vscale x 8 x i16> @xar_nxv8i16_l(<vscale x 8 x i16> %x, <vscale x 8 x i16> %y) {
79; SVE-LABEL: xar_nxv8i16_l:
80; SVE:       // %bb.0:
81; SVE-NEXT:    eor z0.d, z0.d, z1.d
82; SVE-NEXT:    lsr z1.h, z0.h, #4
83; SVE-NEXT:    lsl z0.h, z0.h, #12
84; SVE-NEXT:    orr z0.d, z0.d, z1.d
85; SVE-NEXT:    ret
86;
87; SVE2-LABEL: xar_nxv8i16_l:
88; SVE2:       // %bb.0:
89; SVE2-NEXT:    xar z0.h, z0.h, z1.h, #4
90; SVE2-NEXT:    ret
91    %a = xor <vscale x 8 x i16> %x, %y
92    %b = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %a, <vscale x 8 x i16> splat (i16 12))
93    ret <vscale x 8 x i16> %b
94}
95
96define <vscale x 8 x i16> @xar_nxv8i16_r(<vscale x 8 x i16> %x, <vscale x 8 x i16> %y) {
97; SVE-LABEL: xar_nxv8i16_r:
98; SVE:       // %bb.0:
99; SVE-NEXT:    eor z0.d, z0.d, z1.d
100; SVE-NEXT:    lsl z1.h, z0.h, #12
101; SVE-NEXT:    lsr z0.h, z0.h, #4
102; SVE-NEXT:    orr z0.d, z0.d, z1.d
103; SVE-NEXT:    ret
104;
105; SVE2-LABEL: xar_nxv8i16_r:
106; SVE2:       // %bb.0:
107; SVE2-NEXT:    xar z0.h, z0.h, z1.h, #4
108; SVE2-NEXT:    ret
109    %a = xor <vscale x 8 x i16> %x, %y
110    %b = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %a, <vscale x 8 x i16> splat (i16 4))
111    ret <vscale x 8 x i16> %b
112}
113
114define <vscale x 16 x i8> @xar_nxv16i8_l(<vscale x 16 x i8> %x, <vscale x 16 x i8> %y) {
115; SVE-LABEL: xar_nxv16i8_l:
116; SVE:       // %bb.0:
117; SVE-NEXT:    eor z0.d, z0.d, z1.d
118; SVE-NEXT:    lsr z1.b, z0.b, #4
119; SVE-NEXT:    lsl z0.b, z0.b, #4
120; SVE-NEXT:    orr z0.d, z0.d, z1.d
121; SVE-NEXT:    ret
122;
123; SVE2-LABEL: xar_nxv16i8_l:
124; SVE2:       // %bb.0:
125; SVE2-NEXT:    xar z0.b, z0.b, z1.b, #4
126; SVE2-NEXT:    ret
127    %a = xor <vscale x 16 x i8> %x, %y
128    %b = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %a, <vscale x 16 x i8> splat (i8 4))
129    ret <vscale x 16 x i8> %b
130}
131
132define <vscale x 16 x i8> @xar_nxv16i8_r(<vscale x 16 x i8> %x, <vscale x 16 x i8> %y) {
133; SVE-LABEL: xar_nxv16i8_r:
134; SVE:       // %bb.0:
135; SVE-NEXT:    eor z0.d, z0.d, z1.d
136; SVE-NEXT:    lsl z1.b, z0.b, #4
137; SVE-NEXT:    lsr z0.b, z0.b, #4
138; SVE-NEXT:    orr z0.d, z0.d, z1.d
139; SVE-NEXT:    ret
140;
141; SVE2-LABEL: xar_nxv16i8_r:
142; SVE2:       // %bb.0:
143; SVE2-NEXT:    xar z0.b, z0.b, z1.b, #4
144; SVE2-NEXT:    ret
145    %a = xor <vscale x 16 x i8> %x, %y
146    %b = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %a, <vscale x 16 x i8> splat (i8 4))
147    ret <vscale x 16 x i8> %b
148}
149
150; Shift is not a constant.
151define <vscale x 2 x i64> @xar_nxv2i64_l_neg1(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y, <vscale x 2 x i64> %z) {
152; CHECK-LABEL: xar_nxv2i64_l_neg1:
153; CHECK:       // %bb.0:
154; CHECK-NEXT:    mov z3.d, z2.d
155; CHECK-NEXT:    subr z2.d, z2.d, #0 // =0x0
156; CHECK-NEXT:    eor z0.d, z0.d, z1.d
157; CHECK-NEXT:    ptrue p0.d
158; CHECK-NEXT:    and z3.d, z3.d, #0x3f
159; CHECK-NEXT:    and z2.d, z2.d, #0x3f
160; CHECK-NEXT:    movprfx z1, z0
161; CHECK-NEXT:    lsl z1.d, p0/m, z1.d, z3.d
162; CHECK-NEXT:    lsr z0.d, p0/m, z0.d, z2.d
163; CHECK-NEXT:    orr z0.d, z1.d, z0.d
164; CHECK-NEXT:    ret
165    %a = xor <vscale x 2 x i64> %x, %y
166    %b = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %a, <vscale x 2 x i64> %z)
167    ret <vscale x 2 x i64> %b
168}
169
170; OR instead of an XOR.
171; TODO: We could use usra instruction here for SVE2.
172define <vscale x 2 x i64> @xar_nxv2i64_l_neg2(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y) {
173; CHECK-LABEL: xar_nxv2i64_l_neg2:
174; CHECK:       // %bb.0:
175; CHECK-NEXT:    orr z0.d, z0.d, z1.d
176; CHECK-NEXT:    lsr z1.d, z0.d, #4
177; CHECK-NEXT:    lsl z0.d, z0.d, #60
178; CHECK-NEXT:    orr z0.d, z0.d, z1.d
179; CHECK-NEXT:    ret
180    %a = or <vscale x 2 x i64> %x, %y
181    %b = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %a, <vscale x 2 x i64> splat (i64 60))
182    ret <vscale x 2 x i64> %b
183}
184
185; Rotate amount is 0.
186define <vscale x 2 x i64> @xar_nxv2i64_l_neg3(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y) {
187; CHECK-LABEL: xar_nxv2i64_l_neg3:
188; CHECK:       // %bb.0:
189; CHECK-NEXT:    eor z0.d, z0.d, z1.d
190; CHECK-NEXT:    ret
191    %a = xor <vscale x 2 x i64> %x, %y
192    %b = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %a, <vscale x 2 x i64> splat (i64 64))
193    ret <vscale x 2 x i64> %b
194}
195
196; Uses individual shifts instead of funnel shifts, just one test.
197define <vscale x 2 x i64> @xar_nxv2i64_shifts(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y) {
198; SVE-LABEL: xar_nxv2i64_shifts:
199; SVE:       // %bb.0:
200; SVE-NEXT:    eor z0.d, z0.d, z1.d
201; SVE-NEXT:    lsr z1.d, z0.d, #4
202; SVE-NEXT:    lsl z0.d, z0.d, #60
203; SVE-NEXT:    orr z0.d, z0.d, z1.d
204; SVE-NEXT:    ret
205;
206; SVE2-LABEL: xar_nxv2i64_shifts:
207; SVE2:       // %bb.0:
208; SVE2-NEXT:    xar z0.d, z0.d, z1.d, #4
209; SVE2-NEXT:    ret
210    %xor = xor <vscale x 2 x i64> %x, %y
211    %shl = shl <vscale x 2 x i64> %xor, splat (i64 60)
212    %shr = lshr <vscale x 2 x i64> %xor, splat (i64 4)
213    %or = or <vscale x 2 x i64> %shl, %shr
214    ret <vscale x 2 x i64> %or
215}
216
217; Not a rotate operation as 60 + 3 != 64
218define <vscale x 2 x i64> @xar_nxv2i64_shifts_neg(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y) {
219; CHECK-LABEL: xar_nxv2i64_shifts_neg:
220; CHECK:       // %bb.0:
221; CHECK-NEXT:    eor z0.d, z0.d, z1.d
222; CHECK-NEXT:    lsl z1.d, z0.d, #60
223; CHECK-NEXT:    lsr z0.d, z0.d, #3
224; CHECK-NEXT:    orr z0.d, z1.d, z0.d
225; CHECK-NEXT:    ret
226    %xor = xor <vscale x 2 x i64> %x, %y
227    %shl = shl <vscale x 2 x i64> %xor, splat (i64 60)
228    %shr = lshr <vscale x 2 x i64> %xor, splat (i64 3)
229    %or = or <vscale x 2 x i64> %shl, %shr
230    ret <vscale x 2 x i64> %or
231}
232
233declare <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
234declare <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
235declare <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
236declare <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
237declare <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
238declare <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
239declare <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
240declare <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
241