xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-fixed-length-int-log.ll (revision ab7110bcd6b137803935508de8c9f6af377f9454)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -aarch64-sve-vector-bits-min=256  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
3; RUN: llc -aarch64-sve-vector-bits-min=512  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
4; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
5
6target triple = "aarch64-unknown-linux-gnu"
7
8;
9; AND
10;
11
12; Don't use SVE for 64-bit vectors.
13define <8 x i8> @and_v8i8(<8 x i8> %op1, <8 x i8> %op2) vscale_range(2,0) #0 {
14; CHECK-LABEL: and_v8i8:
15; CHECK:       // %bb.0:
16; CHECK-NEXT:    and v0.8b, v0.8b, v1.8b
17; CHECK-NEXT:    ret
18  %res = and <8 x i8> %op1, %op2
19  ret <8 x i8> %res
20}
21
22; Don't use SVE for 128-bit vectors.
23define <16 x i8> @and_v16i8(<16 x i8> %op1, <16 x i8> %op2) vscale_range(2,0) #0 {
24; CHECK-LABEL: and_v16i8:
25; CHECK:       // %bb.0:
26; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
27; CHECK-NEXT:    ret
28  %res = and <16 x i8> %op1, %op2
29  ret <16 x i8> %res
30}
31
32define void @and_v32i8(ptr %a, ptr %b) vscale_range(2,0) #0 {
33; CHECK-LABEL: and_v32i8:
34; CHECK:       // %bb.0:
35; CHECK-NEXT:    ptrue p0.b, vl32
36; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
37; CHECK-NEXT:    ld1b { z1.b }, p0/z, [x1]
38; CHECK-NEXT:    and z0.d, z0.d, z1.d
39; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
40; CHECK-NEXT:    ret
41  %op1 = load <32 x i8>, ptr %a
42  %op2 = load <32 x i8>, ptr %b
43  %res = and <32 x i8> %op1, %op2
44  store <32 x i8> %res, ptr %a
45  ret void
46}
47
48define void @and_v64i8(ptr %a, ptr %b) #0 {
49; VBITS_GE_256-LABEL: and_v64i8:
50; VBITS_GE_256:       // %bb.0:
51; VBITS_GE_256-NEXT:    ptrue p0.b, vl32
52; VBITS_GE_256-NEXT:    mov w8, #32 // =0x20
53; VBITS_GE_256-NEXT:    ld1b { z0.b }, p0/z, [x0, x8]
54; VBITS_GE_256-NEXT:    ld1b { z1.b }, p0/z, [x1, x8]
55; VBITS_GE_256-NEXT:    ld1b { z2.b }, p0/z, [x0]
56; VBITS_GE_256-NEXT:    ld1b { z3.b }, p0/z, [x1]
57; VBITS_GE_256-NEXT:    and z0.d, z0.d, z1.d
58; VBITS_GE_256-NEXT:    and z1.d, z2.d, z3.d
59; VBITS_GE_256-NEXT:    st1b { z0.b }, p0, [x0, x8]
60; VBITS_GE_256-NEXT:    st1b { z1.b }, p0, [x0]
61; VBITS_GE_256-NEXT:    ret
62;
63; VBITS_GE_512-LABEL: and_v64i8:
64; VBITS_GE_512:       // %bb.0:
65; VBITS_GE_512-NEXT:    ptrue p0.b, vl64
66; VBITS_GE_512-NEXT:    ld1b { z0.b }, p0/z, [x0]
67; VBITS_GE_512-NEXT:    ld1b { z1.b }, p0/z, [x1]
68; VBITS_GE_512-NEXT:    and z0.d, z0.d, z1.d
69; VBITS_GE_512-NEXT:    st1b { z0.b }, p0, [x0]
70; VBITS_GE_512-NEXT:    ret
71  %op1 = load <64 x i8>, ptr %a
72  %op2 = load <64 x i8>, ptr %b
73  %res = and <64 x i8> %op1, %op2
74  store <64 x i8> %res, ptr %a
75  ret void
76}
77
78define void @and_v128i8(ptr %a, ptr %b) vscale_range(8,0) #0 {
79; CHECK-LABEL: and_v128i8:
80; CHECK:       // %bb.0:
81; CHECK-NEXT:    ptrue p0.b, vl128
82; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
83; CHECK-NEXT:    ld1b { z1.b }, p0/z, [x1]
84; CHECK-NEXT:    and z0.d, z0.d, z1.d
85; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
86; CHECK-NEXT:    ret
87  %op1 = load <128 x i8>, ptr %a
88  %op2 = load <128 x i8>, ptr %b
89  %res = and <128 x i8> %op1, %op2
90  store <128 x i8> %res, ptr %a
91  ret void
92}
93
94define void @and_v256i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
95; CHECK-LABEL: and_v256i8:
96; CHECK:       // %bb.0:
97; CHECK-NEXT:    ptrue p0.b, vl256
98; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
99; CHECK-NEXT:    ld1b { z1.b }, p0/z, [x1]
100; CHECK-NEXT:    and z0.d, z0.d, z1.d
101; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
102; CHECK-NEXT:    ret
103  %op1 = load <256 x i8>, ptr %a
104  %op2 = load <256 x i8>, ptr %b
105  %res = and <256 x i8> %op1, %op2
106  store <256 x i8> %res, ptr %a
107  ret void
108}
109
110; Don't use SVE for 64-bit vectors.
111define <4 x i16> @and_v4i16(<4 x i16> %op1, <4 x i16> %op2) vscale_range(2,0) #0 {
112; CHECK-LABEL: and_v4i16:
113; CHECK:       // %bb.0:
114; CHECK-NEXT:    and v0.8b, v0.8b, v1.8b
115; CHECK-NEXT:    ret
116  %res = and <4 x i16> %op1, %op2
117  ret <4 x i16> %res
118}
119
120; Don't use SVE for 128-bit vectors.
121define <8 x i16> @and_v8i16(<8 x i16> %op1, <8 x i16> %op2) vscale_range(2,0) #0 {
122; CHECK-LABEL: and_v8i16:
123; CHECK:       // %bb.0:
124; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
125; CHECK-NEXT:    ret
126  %res = and <8 x i16> %op1, %op2
127  ret <8 x i16> %res
128}
129
130define void @and_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
131; CHECK-LABEL: and_v16i16:
132; CHECK:       // %bb.0:
133; CHECK-NEXT:    ptrue p0.h, vl16
134; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
135; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
136; CHECK-NEXT:    and z0.d, z0.d, z1.d
137; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
138; CHECK-NEXT:    ret
139  %op1 = load <16 x i16>, ptr %a
140  %op2 = load <16 x i16>, ptr %b
141  %res = and <16 x i16> %op1, %op2
142  store <16 x i16> %res, ptr %a
143  ret void
144}
145
146define void @and_v32i16(ptr %a, ptr %b) #0 {
147; VBITS_GE_256-LABEL: and_v32i16:
148; VBITS_GE_256:       // %bb.0:
149; VBITS_GE_256-NEXT:    ptrue p0.h, vl16
150; VBITS_GE_256-NEXT:    mov x8, #16 // =0x10
151; VBITS_GE_256-NEXT:    ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
152; VBITS_GE_256-NEXT:    ld1h { z1.h }, p0/z, [x1, x8, lsl #1]
153; VBITS_GE_256-NEXT:    ld1h { z2.h }, p0/z, [x0]
154; VBITS_GE_256-NEXT:    ld1h { z3.h }, p0/z, [x1]
155; VBITS_GE_256-NEXT:    and z0.d, z0.d, z1.d
156; VBITS_GE_256-NEXT:    and z1.d, z2.d, z3.d
157; VBITS_GE_256-NEXT:    st1h { z0.h }, p0, [x0, x8, lsl #1]
158; VBITS_GE_256-NEXT:    st1h { z1.h }, p0, [x0]
159; VBITS_GE_256-NEXT:    ret
160;
161; VBITS_GE_512-LABEL: and_v32i16:
162; VBITS_GE_512:       // %bb.0:
163; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
164; VBITS_GE_512-NEXT:    ld1h { z0.h }, p0/z, [x0]
165; VBITS_GE_512-NEXT:    ld1h { z1.h }, p0/z, [x1]
166; VBITS_GE_512-NEXT:    and z0.d, z0.d, z1.d
167; VBITS_GE_512-NEXT:    st1h { z0.h }, p0, [x0]
168; VBITS_GE_512-NEXT:    ret
169  %op1 = load <32 x i16>, ptr %a
170  %op2 = load <32 x i16>, ptr %b
171  %res = and <32 x i16> %op1, %op2
172  store <32 x i16> %res, ptr %a
173  ret void
174}
175
176define void @and_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
177; CHECK-LABEL: and_v64i16:
178; CHECK:       // %bb.0:
179; CHECK-NEXT:    ptrue p0.h, vl64
180; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
181; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
182; CHECK-NEXT:    and z0.d, z0.d, z1.d
183; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
184; CHECK-NEXT:    ret
185  %op1 = load <64 x i16>, ptr %a
186  %op2 = load <64 x i16>, ptr %b
187  %res = and <64 x i16> %op1, %op2
188  store <64 x i16> %res, ptr %a
189  ret void
190}
191
192define void @and_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
193; CHECK-LABEL: and_v128i16:
194; CHECK:       // %bb.0:
195; CHECK-NEXT:    ptrue p0.h, vl128
196; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
197; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
198; CHECK-NEXT:    and z0.d, z0.d, z1.d
199; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
200; CHECK-NEXT:    ret
201  %op1 = load <128 x i16>, ptr %a
202  %op2 = load <128 x i16>, ptr %b
203  %res = and <128 x i16> %op1, %op2
204  store <128 x i16> %res, ptr %a
205  ret void
206}
207
208; Don't use SVE for 64-bit vectors.
209define <2 x i32> @and_v2i32(<2 x i32> %op1, <2 x i32> %op2) vscale_range(2,0) #0 {
210; CHECK-LABEL: and_v2i32:
211; CHECK:       // %bb.0:
212; CHECK-NEXT:    and v0.8b, v0.8b, v1.8b
213; CHECK-NEXT:    ret
214  %res = and <2 x i32> %op1, %op2
215  ret <2 x i32> %res
216}
217
218; Don't use SVE for 128-bit vectors.
219define <4 x i32> @and_v4i32(<4 x i32> %op1, <4 x i32> %op2) vscale_range(2,0) #0 {
220; CHECK-LABEL: and_v4i32:
221; CHECK:       // %bb.0:
222; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
223; CHECK-NEXT:    ret
224  %res = and <4 x i32> %op1, %op2
225  ret <4 x i32> %res
226}
227
228define void @and_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
229; CHECK-LABEL: and_v8i32:
230; CHECK:       // %bb.0:
231; CHECK-NEXT:    ptrue p0.s, vl8
232; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
233; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x1]
234; CHECK-NEXT:    and z0.d, z0.d, z1.d
235; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
236; CHECK-NEXT:    ret
237  %op1 = load <8 x i32>, ptr %a
238  %op2 = load <8 x i32>, ptr %b
239  %res = and <8 x i32> %op1, %op2
240  store <8 x i32> %res, ptr %a
241  ret void
242}
243
244define void @and_v16i32(ptr %a, ptr %b) #0 {
245; VBITS_GE_256-LABEL: and_v16i32:
246; VBITS_GE_256:       // %bb.0:
247; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
248; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
249; VBITS_GE_256-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
250; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x1, x8, lsl #2]
251; VBITS_GE_256-NEXT:    ld1w { z2.s }, p0/z, [x0]
252; VBITS_GE_256-NEXT:    ld1w { z3.s }, p0/z, [x1]
253; VBITS_GE_256-NEXT:    and z0.d, z0.d, z1.d
254; VBITS_GE_256-NEXT:    and z1.d, z2.d, z3.d
255; VBITS_GE_256-NEXT:    st1w { z0.s }, p0, [x0, x8, lsl #2]
256; VBITS_GE_256-NEXT:    st1w { z1.s }, p0, [x0]
257; VBITS_GE_256-NEXT:    ret
258;
259; VBITS_GE_512-LABEL: and_v16i32:
260; VBITS_GE_512:       // %bb.0:
261; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
262; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
263; VBITS_GE_512-NEXT:    ld1w { z1.s }, p0/z, [x1]
264; VBITS_GE_512-NEXT:    and z0.d, z0.d, z1.d
265; VBITS_GE_512-NEXT:    st1w { z0.s }, p0, [x0]
266; VBITS_GE_512-NEXT:    ret
267  %op1 = load <16 x i32>, ptr %a
268  %op2 = load <16 x i32>, ptr %b
269  %res = and <16 x i32> %op1, %op2
270  store <16 x i32> %res, ptr %a
271  ret void
272}
273
274define void @and_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
275; CHECK-LABEL: and_v32i32:
276; CHECK:       // %bb.0:
277; CHECK-NEXT:    ptrue p0.s, vl32
278; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
279; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x1]
280; CHECK-NEXT:    and z0.d, z0.d, z1.d
281; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
282; CHECK-NEXT:    ret
283  %op1 = load <32 x i32>, ptr %a
284  %op2 = load <32 x i32>, ptr %b
285  %res = and <32 x i32> %op1, %op2
286  store <32 x i32> %res, ptr %a
287  ret void
288}
289
290define void @and_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
291; CHECK-LABEL: and_v64i32:
292; CHECK:       // %bb.0:
293; CHECK-NEXT:    ptrue p0.s, vl64
294; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
295; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x1]
296; CHECK-NEXT:    and z0.d, z0.d, z1.d
297; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
298; CHECK-NEXT:    ret
299  %op1 = load <64 x i32>, ptr %a
300  %op2 = load <64 x i32>, ptr %b
301  %res = and <64 x i32> %op1, %op2
302  store <64 x i32> %res, ptr %a
303  ret void
304}
305
306; Don't use SVE for 64-bit vectors.
307define <1 x i64> @and_v1i64(<1 x i64> %op1, <1 x i64> %op2) vscale_range(2,0) #0 {
308; CHECK-LABEL: and_v1i64:
309; CHECK:       // %bb.0:
310; CHECK-NEXT:    and v0.8b, v0.8b, v1.8b
311; CHECK-NEXT:    ret
312  %res = and <1 x i64> %op1, %op2
313  ret <1 x i64> %res
314}
315
316; Don't use SVE for 128-bit vectors.
317define <2 x i64> @and_v2i64(<2 x i64> %op1, <2 x i64> %op2) vscale_range(2,0) #0 {
318; CHECK-LABEL: and_v2i64:
319; CHECK:       // %bb.0:
320; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
321; CHECK-NEXT:    ret
322  %res = and <2 x i64> %op1, %op2
323  ret <2 x i64> %res
324}
325
326define void @and_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
327; CHECK-LABEL: and_v4i64:
328; CHECK:       // %bb.0:
329; CHECK-NEXT:    ptrue p0.d, vl4
330; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
331; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x1]
332; CHECK-NEXT:    and z0.d, z0.d, z1.d
333; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
334; CHECK-NEXT:    ret
335  %op1 = load <4 x i64>, ptr %a
336  %op2 = load <4 x i64>, ptr %b
337  %res = and <4 x i64> %op1, %op2
338  store <4 x i64> %res, ptr %a
339  ret void
340}
341
342define void @and_v8i64(ptr %a, ptr %b) #0 {
343; VBITS_GE_256-LABEL: and_v8i64:
344; VBITS_GE_256:       // %bb.0:
345; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
346; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
347; VBITS_GE_256-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
348; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x1, x8, lsl #3]
349; VBITS_GE_256-NEXT:    ld1d { z2.d }, p0/z, [x0]
350; VBITS_GE_256-NEXT:    ld1d { z3.d }, p0/z, [x1]
351; VBITS_GE_256-NEXT:    and z0.d, z0.d, z1.d
352; VBITS_GE_256-NEXT:    and z1.d, z2.d, z3.d
353; VBITS_GE_256-NEXT:    st1d { z0.d }, p0, [x0, x8, lsl #3]
354; VBITS_GE_256-NEXT:    st1d { z1.d }, p0, [x0]
355; VBITS_GE_256-NEXT:    ret
356;
357; VBITS_GE_512-LABEL: and_v8i64:
358; VBITS_GE_512:       // %bb.0:
359; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
360; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
361; VBITS_GE_512-NEXT:    ld1d { z1.d }, p0/z, [x1]
362; VBITS_GE_512-NEXT:    and z0.d, z0.d, z1.d
363; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x0]
364; VBITS_GE_512-NEXT:    ret
365  %op1 = load <8 x i64>, ptr %a
366  %op2 = load <8 x i64>, ptr %b
367  %res = and <8 x i64> %op1, %op2
368  store <8 x i64> %res, ptr %a
369  ret void
370}
371
372define void @and_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
373; CHECK-LABEL: and_v16i64:
374; CHECK:       // %bb.0:
375; CHECK-NEXT:    ptrue p0.d, vl16
376; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
377; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x1]
378; CHECK-NEXT:    and z0.d, z0.d, z1.d
379; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
380; CHECK-NEXT:    ret
381  %op1 = load <16 x i64>, ptr %a
382  %op2 = load <16 x i64>, ptr %b
383  %res = and <16 x i64> %op1, %op2
384  store <16 x i64> %res, ptr %a
385  ret void
386}
387
388define void @and_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
389; CHECK-LABEL: and_v32i64:
390; CHECK:       // %bb.0:
391; CHECK-NEXT:    ptrue p0.d, vl32
392; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
393; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x1]
394; CHECK-NEXT:    and z0.d, z0.d, z1.d
395; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
396; CHECK-NEXT:    ret
397  %op1 = load <32 x i64>, ptr %a
398  %op2 = load <32 x i64>, ptr %b
399  %res = and <32 x i64> %op1, %op2
400  store <32 x i64> %res, ptr %a
401  ret void
402}
403
404;
405; OR
406;
407
408; Don't use SVE for 64-bit vectors.
409define <8 x i8> @or_v8i8(<8 x i8> %op1, <8 x i8> %op2) vscale_range(2,0) #0 {
410; CHECK-LABEL: or_v8i8:
411; CHECK:       // %bb.0:
412; CHECK-NEXT:    orr v0.8b, v0.8b, v1.8b
413; CHECK-NEXT:    ret
414  %res = or <8 x i8> %op1, %op2
415  ret <8 x i8> %res
416}
417
418; Don't use SVE for 128-bit vectors.
419define <16 x i8> @or_v16i8(<16 x i8> %op1, <16 x i8> %op2) vscale_range(2,0) #0 {
420; CHECK-LABEL: or_v16i8:
421; CHECK:       // %bb.0:
422; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
423; CHECK-NEXT:    ret
424  %res = or <16 x i8> %op1, %op2
425  ret <16 x i8> %res
426}
427
428define void @or_v32i8(ptr %a, ptr %b) vscale_range(2,0) #0 {
429; CHECK-LABEL: or_v32i8:
430; CHECK:       // %bb.0:
431; CHECK-NEXT:    ptrue p0.b, vl32
432; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
433; CHECK-NEXT:    ld1b { z1.b }, p0/z, [x1]
434; CHECK-NEXT:    orr z0.d, z0.d, z1.d
435; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
436; CHECK-NEXT:    ret
437  %op1 = load <32 x i8>, ptr %a
438  %op2 = load <32 x i8>, ptr %b
439  %res = or <32 x i8> %op1, %op2
440  store <32 x i8> %res, ptr %a
441  ret void
442}
443
444define void @or_v64i8(ptr %a, ptr %b) #0 {
445; VBITS_GE_256-LABEL: or_v64i8:
446; VBITS_GE_256:       // %bb.0:
447; VBITS_GE_256-NEXT:    ptrue p0.b, vl32
448; VBITS_GE_256-NEXT:    mov w8, #32 // =0x20
449; VBITS_GE_256-NEXT:    ld1b { z0.b }, p0/z, [x0, x8]
450; VBITS_GE_256-NEXT:    ld1b { z1.b }, p0/z, [x1, x8]
451; VBITS_GE_256-NEXT:    ld1b { z2.b }, p0/z, [x0]
452; VBITS_GE_256-NEXT:    ld1b { z3.b }, p0/z, [x1]
453; VBITS_GE_256-NEXT:    orr z0.d, z0.d, z1.d
454; VBITS_GE_256-NEXT:    orr z1.d, z2.d, z3.d
455; VBITS_GE_256-NEXT:    st1b { z0.b }, p0, [x0, x8]
456; VBITS_GE_256-NEXT:    st1b { z1.b }, p0, [x0]
457; VBITS_GE_256-NEXT:    ret
458;
459; VBITS_GE_512-LABEL: or_v64i8:
460; VBITS_GE_512:       // %bb.0:
461; VBITS_GE_512-NEXT:    ptrue p0.b, vl64
462; VBITS_GE_512-NEXT:    ld1b { z0.b }, p0/z, [x0]
463; VBITS_GE_512-NEXT:    ld1b { z1.b }, p0/z, [x1]
464; VBITS_GE_512-NEXT:    orr z0.d, z0.d, z1.d
465; VBITS_GE_512-NEXT:    st1b { z0.b }, p0, [x0]
466; VBITS_GE_512-NEXT:    ret
467  %op1 = load <64 x i8>, ptr %a
468  %op2 = load <64 x i8>, ptr %b
469  %res = or <64 x i8> %op1, %op2
470  store <64 x i8> %res, ptr %a
471  ret void
472}
473
474define void @or_v128i8(ptr %a, ptr %b) vscale_range(8,0) #0 {
475; CHECK-LABEL: or_v128i8:
476; CHECK:       // %bb.0:
477; CHECK-NEXT:    ptrue p0.b, vl128
478; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
479; CHECK-NEXT:    ld1b { z1.b }, p0/z, [x1]
480; CHECK-NEXT:    orr z0.d, z0.d, z1.d
481; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
482; CHECK-NEXT:    ret
483  %op1 = load <128 x i8>, ptr %a
484  %op2 = load <128 x i8>, ptr %b
485  %res = or <128 x i8> %op1, %op2
486  store <128 x i8> %res, ptr %a
487  ret void
488}
489
490define void @or_v256i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
491; CHECK-LABEL: or_v256i8:
492; CHECK:       // %bb.0:
493; CHECK-NEXT:    ptrue p0.b, vl256
494; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
495; CHECK-NEXT:    ld1b { z1.b }, p0/z, [x1]
496; CHECK-NEXT:    orr z0.d, z0.d, z1.d
497; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
498; CHECK-NEXT:    ret
499  %op1 = load <256 x i8>, ptr %a
500  %op2 = load <256 x i8>, ptr %b
501  %res = or <256 x i8> %op1, %op2
502  store <256 x i8> %res, ptr %a
503  ret void
504}
505
506; Don't use SVE for 64-bit vectors.
507define <4 x i16> @or_v4i16(<4 x i16> %op1, <4 x i16> %op2) vscale_range(2,0) #0 {
508; CHECK-LABEL: or_v4i16:
509; CHECK:       // %bb.0:
510; CHECK-NEXT:    orr v0.8b, v0.8b, v1.8b
511; CHECK-NEXT:    ret
512  %res = or <4 x i16> %op1, %op2
513  ret <4 x i16> %res
514}
515
516; Don't use SVE for 128-bit vectors.
517define <8 x i16> @or_v8i16(<8 x i16> %op1, <8 x i16> %op2) vscale_range(2,0) #0 {
518; CHECK-LABEL: or_v8i16:
519; CHECK:       // %bb.0:
520; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
521; CHECK-NEXT:    ret
522  %res = or <8 x i16> %op1, %op2
523  ret <8 x i16> %res
524}
525
526define void @or_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
527; CHECK-LABEL: or_v16i16:
528; CHECK:       // %bb.0:
529; CHECK-NEXT:    ptrue p0.h, vl16
530; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
531; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
532; CHECK-NEXT:    orr z0.d, z0.d, z1.d
533; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
534; CHECK-NEXT:    ret
535  %op1 = load <16 x i16>, ptr %a
536  %op2 = load <16 x i16>, ptr %b
537  %res = or <16 x i16> %op1, %op2
538  store <16 x i16> %res, ptr %a
539  ret void
540}
541
542define void @or_v32i16(ptr %a, ptr %b) #0 {
543; VBITS_GE_256-LABEL: or_v32i16:
544; VBITS_GE_256:       // %bb.0:
545; VBITS_GE_256-NEXT:    ptrue p0.h, vl16
546; VBITS_GE_256-NEXT:    mov x8, #16 // =0x10
547; VBITS_GE_256-NEXT:    ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
548; VBITS_GE_256-NEXT:    ld1h { z1.h }, p0/z, [x1, x8, lsl #1]
549; VBITS_GE_256-NEXT:    ld1h { z2.h }, p0/z, [x0]
550; VBITS_GE_256-NEXT:    ld1h { z3.h }, p0/z, [x1]
551; VBITS_GE_256-NEXT:    orr z0.d, z0.d, z1.d
552; VBITS_GE_256-NEXT:    orr z1.d, z2.d, z3.d
553; VBITS_GE_256-NEXT:    st1h { z0.h }, p0, [x0, x8, lsl #1]
554; VBITS_GE_256-NEXT:    st1h { z1.h }, p0, [x0]
555; VBITS_GE_256-NEXT:    ret
556;
557; VBITS_GE_512-LABEL: or_v32i16:
558; VBITS_GE_512:       // %bb.0:
559; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
560; VBITS_GE_512-NEXT:    ld1h { z0.h }, p0/z, [x0]
561; VBITS_GE_512-NEXT:    ld1h { z1.h }, p0/z, [x1]
562; VBITS_GE_512-NEXT:    orr z0.d, z0.d, z1.d
563; VBITS_GE_512-NEXT:    st1h { z0.h }, p0, [x0]
564; VBITS_GE_512-NEXT:    ret
565  %op1 = load <32 x i16>, ptr %a
566  %op2 = load <32 x i16>, ptr %b
567  %res = or <32 x i16> %op1, %op2
568  store <32 x i16> %res, ptr %a
569  ret void
570}
571
572define void @or_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
573; CHECK-LABEL: or_v64i16:
574; CHECK:       // %bb.0:
575; CHECK-NEXT:    ptrue p0.h, vl64
576; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
577; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
578; CHECK-NEXT:    orr z0.d, z0.d, z1.d
579; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
580; CHECK-NEXT:    ret
581  %op1 = load <64 x i16>, ptr %a
582  %op2 = load <64 x i16>, ptr %b
583  %res = or <64 x i16> %op1, %op2
584  store <64 x i16> %res, ptr %a
585  ret void
586}
587
588define void @or_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
589; CHECK-LABEL: or_v128i16:
590; CHECK:       // %bb.0:
591; CHECK-NEXT:    ptrue p0.h, vl128
592; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
593; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
594; CHECK-NEXT:    orr z0.d, z0.d, z1.d
595; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
596; CHECK-NEXT:    ret
597  %op1 = load <128 x i16>, ptr %a
598  %op2 = load <128 x i16>, ptr %b
599  %res = or <128 x i16> %op1, %op2
600  store <128 x i16> %res, ptr %a
601  ret void
602}
603
604; Don't use SVE for 64-bit vectors.
605define <2 x i32> @or_v2i32(<2 x i32> %op1, <2 x i32> %op2) vscale_range(2,0) #0 {
606; CHECK-LABEL: or_v2i32:
607; CHECK:       // %bb.0:
608; CHECK-NEXT:    orr v0.8b, v0.8b, v1.8b
609; CHECK-NEXT:    ret
610  %res = or <2 x i32> %op1, %op2
611  ret <2 x i32> %res
612}
613
614; Don't use SVE for 128-bit vectors.
615define <4 x i32> @or_v4i32(<4 x i32> %op1, <4 x i32> %op2) vscale_range(2,0) #0 {
616; CHECK-LABEL: or_v4i32:
617; CHECK:       // %bb.0:
618; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
619; CHECK-NEXT:    ret
620  %res = or <4 x i32> %op1, %op2
621  ret <4 x i32> %res
622}
623
624define void @or_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
625; CHECK-LABEL: or_v8i32:
626; CHECK:       // %bb.0:
627; CHECK-NEXT:    ptrue p0.s, vl8
628; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
629; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x1]
630; CHECK-NEXT:    orr z0.d, z0.d, z1.d
631; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
632; CHECK-NEXT:    ret
633  %op1 = load <8 x i32>, ptr %a
634  %op2 = load <8 x i32>, ptr %b
635  %res = or <8 x i32> %op1, %op2
636  store <8 x i32> %res, ptr %a
637  ret void
638}
639
640define void @or_v16i32(ptr %a, ptr %b) #0 {
641; VBITS_GE_256-LABEL: or_v16i32:
642; VBITS_GE_256:       // %bb.0:
643; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
644; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
645; VBITS_GE_256-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
646; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x1, x8, lsl #2]
647; VBITS_GE_256-NEXT:    ld1w { z2.s }, p0/z, [x0]
648; VBITS_GE_256-NEXT:    ld1w { z3.s }, p0/z, [x1]
649; VBITS_GE_256-NEXT:    orr z0.d, z0.d, z1.d
650; VBITS_GE_256-NEXT:    orr z1.d, z2.d, z3.d
651; VBITS_GE_256-NEXT:    st1w { z0.s }, p0, [x0, x8, lsl #2]
652; VBITS_GE_256-NEXT:    st1w { z1.s }, p0, [x0]
653; VBITS_GE_256-NEXT:    ret
654;
655; VBITS_GE_512-LABEL: or_v16i32:
656; VBITS_GE_512:       // %bb.0:
657; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
658; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
659; VBITS_GE_512-NEXT:    ld1w { z1.s }, p0/z, [x1]
660; VBITS_GE_512-NEXT:    orr z0.d, z0.d, z1.d
661; VBITS_GE_512-NEXT:    st1w { z0.s }, p0, [x0]
662; VBITS_GE_512-NEXT:    ret
663  %op1 = load <16 x i32>, ptr %a
664  %op2 = load <16 x i32>, ptr %b
665  %res = or <16 x i32> %op1, %op2
666  store <16 x i32> %res, ptr %a
667  ret void
668}
669
670define void @or_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
671; CHECK-LABEL: or_v32i32:
672; CHECK:       // %bb.0:
673; CHECK-NEXT:    ptrue p0.s, vl32
674; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
675; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x1]
676; CHECK-NEXT:    orr z0.d, z0.d, z1.d
677; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
678; CHECK-NEXT:    ret
679  %op1 = load <32 x i32>, ptr %a
680  %op2 = load <32 x i32>, ptr %b
681  %res = or <32 x i32> %op1, %op2
682  store <32 x i32> %res, ptr %a
683  ret void
684}
685
686define void @or_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
687; CHECK-LABEL: or_v64i32:
688; CHECK:       // %bb.0:
689; CHECK-NEXT:    ptrue p0.s, vl64
690; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
691; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x1]
692; CHECK-NEXT:    orr z0.d, z0.d, z1.d
693; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
694; CHECK-NEXT:    ret
695  %op1 = load <64 x i32>, ptr %a
696  %op2 = load <64 x i32>, ptr %b
697  %res = or <64 x i32> %op1, %op2
698  store <64 x i32> %res, ptr %a
699  ret void
700}
701
702; Don't use SVE for 64-bit vectors.
703define <1 x i64> @or_v1i64(<1 x i64> %op1, <1 x i64> %op2) vscale_range(2,0) #0 {
704; CHECK-LABEL: or_v1i64:
705; CHECK:       // %bb.0:
706; CHECK-NEXT:    orr v0.8b, v0.8b, v1.8b
707; CHECK-NEXT:    ret
708  %res = or <1 x i64> %op1, %op2
709  ret <1 x i64> %res
710}
711
712; Don't use SVE for 128-bit vectors.
713define <2 x i64> @or_v2i64(<2 x i64> %op1, <2 x i64> %op2) vscale_range(2,0) #0 {
714; CHECK-LABEL: or_v2i64:
715; CHECK:       // %bb.0:
716; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
717; CHECK-NEXT:    ret
718  %res = or <2 x i64> %op1, %op2
719  ret <2 x i64> %res
720}
721
722define void @or_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
723; CHECK-LABEL: or_v4i64:
724; CHECK:       // %bb.0:
725; CHECK-NEXT:    ptrue p0.d, vl4
726; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
727; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x1]
728; CHECK-NEXT:    orr z0.d, z0.d, z1.d
729; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
730; CHECK-NEXT:    ret
731  %op1 = load <4 x i64>, ptr %a
732  %op2 = load <4 x i64>, ptr %b
733  %res = or <4 x i64> %op1, %op2
734  store <4 x i64> %res, ptr %a
735  ret void
736}
737
738define void @or_v8i64(ptr %a, ptr %b) #0 {
739; VBITS_GE_256-LABEL: or_v8i64:
740; VBITS_GE_256:       // %bb.0:
741; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
742; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
743; VBITS_GE_256-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
744; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x1, x8, lsl #3]
745; VBITS_GE_256-NEXT:    ld1d { z2.d }, p0/z, [x0]
746; VBITS_GE_256-NEXT:    ld1d { z3.d }, p0/z, [x1]
747; VBITS_GE_256-NEXT:    orr z0.d, z0.d, z1.d
748; VBITS_GE_256-NEXT:    orr z1.d, z2.d, z3.d
749; VBITS_GE_256-NEXT:    st1d { z0.d }, p0, [x0, x8, lsl #3]
750; VBITS_GE_256-NEXT:    st1d { z1.d }, p0, [x0]
751; VBITS_GE_256-NEXT:    ret
752;
753; VBITS_GE_512-LABEL: or_v8i64:
754; VBITS_GE_512:       // %bb.0:
755; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
756; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
757; VBITS_GE_512-NEXT:    ld1d { z1.d }, p0/z, [x1]
758; VBITS_GE_512-NEXT:    orr z0.d, z0.d, z1.d
759; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x0]
760; VBITS_GE_512-NEXT:    ret
761  %op1 = load <8 x i64>, ptr %a
762  %op2 = load <8 x i64>, ptr %b
763  %res = or <8 x i64> %op1, %op2
764  store <8 x i64> %res, ptr %a
765  ret void
766}
767
768define void @or_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
769; CHECK-LABEL: or_v16i64:
770; CHECK:       // %bb.0:
771; CHECK-NEXT:    ptrue p0.d, vl16
772; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
773; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x1]
774; CHECK-NEXT:    orr z0.d, z0.d, z1.d
775; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
776; CHECK-NEXT:    ret
777  %op1 = load <16 x i64>, ptr %a
778  %op2 = load <16 x i64>, ptr %b
779  %res = or <16 x i64> %op1, %op2
780  store <16 x i64> %res, ptr %a
781  ret void
782}
783
784define void @or_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
785; CHECK-LABEL: or_v32i64:
786; CHECK:       // %bb.0:
787; CHECK-NEXT:    ptrue p0.d, vl32
788; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
789; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x1]
790; CHECK-NEXT:    orr z0.d, z0.d, z1.d
791; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
792; CHECK-NEXT:    ret
793  %op1 = load <32 x i64>, ptr %a
794  %op2 = load <32 x i64>, ptr %b
795  %res = or <32 x i64> %op1, %op2
796  store <32 x i64> %res, ptr %a
797  ret void
798}
799
800;
801; XOR
802;
803
804; Don't use SVE for 64-bit vectors.
805define <8 x i8> @xor_v8i8(<8 x i8> %op1, <8 x i8> %op2) vscale_range(2,0) #0 {
806; CHECK-LABEL: xor_v8i8:
807; CHECK:       // %bb.0:
808; CHECK-NEXT:    eor v0.8b, v0.8b, v1.8b
809; CHECK-NEXT:    ret
810  %res = xor <8 x i8> %op1, %op2
811  ret <8 x i8> %res
812}
813
814; Don't use SVE for 128-bit vectors.
815define <16 x i8> @xor_v16i8(<16 x i8> %op1, <16 x i8> %op2) vscale_range(2,0) #0 {
816; CHECK-LABEL: xor_v16i8:
817; CHECK:       // %bb.0:
818; CHECK-NEXT:    eor v0.16b, v0.16b, v1.16b
819; CHECK-NEXT:    ret
820  %res = xor <16 x i8> %op1, %op2
821  ret <16 x i8> %res
822}
823
824define void @xor_v32i8(ptr %a, ptr %b) vscale_range(2,0) #0 {
825; CHECK-LABEL: xor_v32i8:
826; CHECK:       // %bb.0:
827; CHECK-NEXT:    ptrue p0.b, vl32
828; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
829; CHECK-NEXT:    ld1b { z1.b }, p0/z, [x1]
830; CHECK-NEXT:    eor z0.d, z0.d, z1.d
831; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
832; CHECK-NEXT:    ret
833  %op1 = load <32 x i8>, ptr %a
834  %op2 = load <32 x i8>, ptr %b
835  %res = xor <32 x i8> %op1, %op2
836  store <32 x i8> %res, ptr %a
837  ret void
838}
839
840define void @xor_v64i8(ptr %a, ptr %b) #0 {
841; VBITS_GE_256-LABEL: xor_v64i8:
842; VBITS_GE_256:       // %bb.0:
843; VBITS_GE_256-NEXT:    ptrue p0.b, vl32
844; VBITS_GE_256-NEXT:    mov w8, #32 // =0x20
845; VBITS_GE_256-NEXT:    ld1b { z0.b }, p0/z, [x0, x8]
846; VBITS_GE_256-NEXT:    ld1b { z1.b }, p0/z, [x1, x8]
847; VBITS_GE_256-NEXT:    ld1b { z2.b }, p0/z, [x0]
848; VBITS_GE_256-NEXT:    ld1b { z3.b }, p0/z, [x1]
849; VBITS_GE_256-NEXT:    eor z0.d, z0.d, z1.d
850; VBITS_GE_256-NEXT:    eor z1.d, z2.d, z3.d
851; VBITS_GE_256-NEXT:    st1b { z0.b }, p0, [x0, x8]
852; VBITS_GE_256-NEXT:    st1b { z1.b }, p0, [x0]
853; VBITS_GE_256-NEXT:    ret
854;
855; VBITS_GE_512-LABEL: xor_v64i8:
856; VBITS_GE_512:       // %bb.0:
857; VBITS_GE_512-NEXT:    ptrue p0.b, vl64
858; VBITS_GE_512-NEXT:    ld1b { z0.b }, p0/z, [x0]
859; VBITS_GE_512-NEXT:    ld1b { z1.b }, p0/z, [x1]
860; VBITS_GE_512-NEXT:    eor z0.d, z0.d, z1.d
861; VBITS_GE_512-NEXT:    st1b { z0.b }, p0, [x0]
862; VBITS_GE_512-NEXT:    ret
863  %op1 = load <64 x i8>, ptr %a
864  %op2 = load <64 x i8>, ptr %b
865  %res = xor <64 x i8> %op1, %op2
866  store <64 x i8> %res, ptr %a
867  ret void
868}
869
870define void @xor_v128i8(ptr %a, ptr %b) vscale_range(8,0) #0 {
871; CHECK-LABEL: xor_v128i8:
872; CHECK:       // %bb.0:
873; CHECK-NEXT:    ptrue p0.b, vl128
874; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
875; CHECK-NEXT:    ld1b { z1.b }, p0/z, [x1]
876; CHECK-NEXT:    eor z0.d, z0.d, z1.d
877; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
878; CHECK-NEXT:    ret
879  %op1 = load <128 x i8>, ptr %a
880  %op2 = load <128 x i8>, ptr %b
881  %res = xor <128 x i8> %op1, %op2
882  store <128 x i8> %res, ptr %a
883  ret void
884}
885
886define void @xor_v256i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
887; CHECK-LABEL: xor_v256i8:
888; CHECK:       // %bb.0:
889; CHECK-NEXT:    ptrue p0.b, vl256
890; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
891; CHECK-NEXT:    ld1b { z1.b }, p0/z, [x1]
892; CHECK-NEXT:    eor z0.d, z0.d, z1.d
893; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
894; CHECK-NEXT:    ret
895  %op1 = load <256 x i8>, ptr %a
896  %op2 = load <256 x i8>, ptr %b
897  %res = xor <256 x i8> %op1, %op2
898  store <256 x i8> %res, ptr %a
899  ret void
900}
901
902; Don't use SVE for 64-bit vectors.
903define <4 x i16> @xor_v4i16(<4 x i16> %op1, <4 x i16> %op2) vscale_range(2,0) #0 {
904; CHECK-LABEL: xor_v4i16:
905; CHECK:       // %bb.0:
906; CHECK-NEXT:    eor v0.8b, v0.8b, v1.8b
907; CHECK-NEXT:    ret
908  %res = xor <4 x i16> %op1, %op2
909  ret <4 x i16> %res
910}
911
912; Don't use SVE for 128-bit vectors.
913define <8 x i16> @xor_v8i16(<8 x i16> %op1, <8 x i16> %op2) vscale_range(2,0) #0 {
914; CHECK-LABEL: xor_v8i16:
915; CHECK:       // %bb.0:
916; CHECK-NEXT:    eor v0.16b, v0.16b, v1.16b
917; CHECK-NEXT:    ret
918  %res = xor <8 x i16> %op1, %op2
919  ret <8 x i16> %res
920}
921
922define void @xor_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
923; CHECK-LABEL: xor_v16i16:
924; CHECK:       // %bb.0:
925; CHECK-NEXT:    ptrue p0.h, vl16
926; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
927; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
928; CHECK-NEXT:    eor z0.d, z0.d, z1.d
929; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
930; CHECK-NEXT:    ret
931  %op1 = load <16 x i16>, ptr %a
932  %op2 = load <16 x i16>, ptr %b
933  %res = xor <16 x i16> %op1, %op2
934  store <16 x i16> %res, ptr %a
935  ret void
936}
937
938define void @xor_v32i16(ptr %a, ptr %b) #0 {
939; VBITS_GE_256-LABEL: xor_v32i16:
940; VBITS_GE_256:       // %bb.0:
941; VBITS_GE_256-NEXT:    ptrue p0.h, vl16
942; VBITS_GE_256-NEXT:    mov x8, #16 // =0x10
943; VBITS_GE_256-NEXT:    ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
944; VBITS_GE_256-NEXT:    ld1h { z1.h }, p0/z, [x1, x8, lsl #1]
945; VBITS_GE_256-NEXT:    ld1h { z2.h }, p0/z, [x0]
946; VBITS_GE_256-NEXT:    ld1h { z3.h }, p0/z, [x1]
947; VBITS_GE_256-NEXT:    eor z0.d, z0.d, z1.d
948; VBITS_GE_256-NEXT:    eor z1.d, z2.d, z3.d
949; VBITS_GE_256-NEXT:    st1h { z0.h }, p0, [x0, x8, lsl #1]
950; VBITS_GE_256-NEXT:    st1h { z1.h }, p0, [x0]
951; VBITS_GE_256-NEXT:    ret
952;
953; VBITS_GE_512-LABEL: xor_v32i16:
954; VBITS_GE_512:       // %bb.0:
955; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
956; VBITS_GE_512-NEXT:    ld1h { z0.h }, p0/z, [x0]
957; VBITS_GE_512-NEXT:    ld1h { z1.h }, p0/z, [x1]
958; VBITS_GE_512-NEXT:    eor z0.d, z0.d, z1.d
959; VBITS_GE_512-NEXT:    st1h { z0.h }, p0, [x0]
960; VBITS_GE_512-NEXT:    ret
961  %op1 = load <32 x i16>, ptr %a
962  %op2 = load <32 x i16>, ptr %b
963  %res = xor <32 x i16> %op1, %op2
964  store <32 x i16> %res, ptr %a
965  ret void
966}
967
968define void @xor_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
969; CHECK-LABEL: xor_v64i16:
970; CHECK:       // %bb.0:
971; CHECK-NEXT:    ptrue p0.h, vl64
972; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
973; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
974; CHECK-NEXT:    eor z0.d, z0.d, z1.d
975; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
976; CHECK-NEXT:    ret
977  %op1 = load <64 x i16>, ptr %a
978  %op2 = load <64 x i16>, ptr %b
979  %res = xor <64 x i16> %op1, %op2
980  store <64 x i16> %res, ptr %a
981  ret void
982}
983
984define void @xor_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
985; CHECK-LABEL: xor_v128i16:
986; CHECK:       // %bb.0:
987; CHECK-NEXT:    ptrue p0.h, vl128
988; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
989; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
990; CHECK-NEXT:    eor z0.d, z0.d, z1.d
991; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
992; CHECK-NEXT:    ret
993  %op1 = load <128 x i16>, ptr %a
994  %op2 = load <128 x i16>, ptr %b
995  %res = xor <128 x i16> %op1, %op2
996  store <128 x i16> %res, ptr %a
997  ret void
998}
999
1000; Don't use SVE for 64-bit vectors.
1001define <2 x i32> @xor_v2i32(<2 x i32> %op1, <2 x i32> %op2) vscale_range(2,0) #0 {
1002; CHECK-LABEL: xor_v2i32:
1003; CHECK:       // %bb.0:
1004; CHECK-NEXT:    eor v0.8b, v0.8b, v1.8b
1005; CHECK-NEXT:    ret
1006  %res = xor <2 x i32> %op1, %op2
1007  ret <2 x i32> %res
1008}
1009
1010; Don't use SVE for 128-bit vectors.
1011define <4 x i32> @xor_v4i32(<4 x i32> %op1, <4 x i32> %op2) vscale_range(2,0) #0 {
1012; CHECK-LABEL: xor_v4i32:
1013; CHECK:       // %bb.0:
1014; CHECK-NEXT:    eor v0.16b, v0.16b, v1.16b
1015; CHECK-NEXT:    ret
1016  %res = xor <4 x i32> %op1, %op2
1017  ret <4 x i32> %res
1018}
1019
1020define void @xor_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
1021; CHECK-LABEL: xor_v8i32:
1022; CHECK:       // %bb.0:
1023; CHECK-NEXT:    ptrue p0.s, vl8
1024; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
1025; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x1]
1026; CHECK-NEXT:    eor z0.d, z0.d, z1.d
1027; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
1028; CHECK-NEXT:    ret
1029  %op1 = load <8 x i32>, ptr %a
1030  %op2 = load <8 x i32>, ptr %b
1031  %res = xor <8 x i32> %op1, %op2
1032  store <8 x i32> %res, ptr %a
1033  ret void
1034}
1035
1036define void @xor_v16i32(ptr %a, ptr %b) #0 {
1037; VBITS_GE_256-LABEL: xor_v16i32:
1038; VBITS_GE_256:       // %bb.0:
1039; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
1040; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
1041; VBITS_GE_256-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
1042; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x1, x8, lsl #2]
1043; VBITS_GE_256-NEXT:    ld1w { z2.s }, p0/z, [x0]
1044; VBITS_GE_256-NEXT:    ld1w { z3.s }, p0/z, [x1]
1045; VBITS_GE_256-NEXT:    eor z0.d, z0.d, z1.d
1046; VBITS_GE_256-NEXT:    eor z1.d, z2.d, z3.d
1047; VBITS_GE_256-NEXT:    st1w { z0.s }, p0, [x0, x8, lsl #2]
1048; VBITS_GE_256-NEXT:    st1w { z1.s }, p0, [x0]
1049; VBITS_GE_256-NEXT:    ret
1050;
1051; VBITS_GE_512-LABEL: xor_v16i32:
1052; VBITS_GE_512:       // %bb.0:
1053; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
1054; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
1055; VBITS_GE_512-NEXT:    ld1w { z1.s }, p0/z, [x1]
1056; VBITS_GE_512-NEXT:    eor z0.d, z0.d, z1.d
1057; VBITS_GE_512-NEXT:    st1w { z0.s }, p0, [x0]
1058; VBITS_GE_512-NEXT:    ret
1059  %op1 = load <16 x i32>, ptr %a
1060  %op2 = load <16 x i32>, ptr %b
1061  %res = xor <16 x i32> %op1, %op2
1062  store <16 x i32> %res, ptr %a
1063  ret void
1064}
1065
1066define void @xor_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
1067; CHECK-LABEL: xor_v32i32:
1068; CHECK:       // %bb.0:
1069; CHECK-NEXT:    ptrue p0.s, vl32
1070; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
1071; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x1]
1072; CHECK-NEXT:    eor z0.d, z0.d, z1.d
1073; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
1074; CHECK-NEXT:    ret
1075  %op1 = load <32 x i32>, ptr %a
1076  %op2 = load <32 x i32>, ptr %b
1077  %res = xor <32 x i32> %op1, %op2
1078  store <32 x i32> %res, ptr %a
1079  ret void
1080}
1081
1082define void @xor_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
1083; CHECK-LABEL: xor_v64i32:
1084; CHECK:       // %bb.0:
1085; CHECK-NEXT:    ptrue p0.s, vl64
1086; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
1087; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x1]
1088; CHECK-NEXT:    eor z0.d, z0.d, z1.d
1089; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
1090; CHECK-NEXT:    ret
1091  %op1 = load <64 x i32>, ptr %a
1092  %op2 = load <64 x i32>, ptr %b
1093  %res = xor <64 x i32> %op1, %op2
1094  store <64 x i32> %res, ptr %a
1095  ret void
1096}
1097
1098; Don't use SVE for 64-bit vectors.
1099define <1 x i64> @xor_v1i64(<1 x i64> %op1, <1 x i64> %op2) vscale_range(2,0) #0 {
1100; CHECK-LABEL: xor_v1i64:
1101; CHECK:       // %bb.0:
1102; CHECK-NEXT:    eor v0.8b, v0.8b, v1.8b
1103; CHECK-NEXT:    ret
1104  %res = xor <1 x i64> %op1, %op2
1105  ret <1 x i64> %res
1106}
1107
1108; Don't use SVE for 128-bit vectors.
1109define <2 x i64> @xor_v2i64(<2 x i64> %op1, <2 x i64> %op2) vscale_range(2,0) #0 {
1110; CHECK-LABEL: xor_v2i64:
1111; CHECK:       // %bb.0:
1112; CHECK-NEXT:    eor v0.16b, v0.16b, v1.16b
1113; CHECK-NEXT:    ret
1114  %res = xor <2 x i64> %op1, %op2
1115  ret <2 x i64> %res
1116}
1117
1118define void @xor_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
1119; CHECK-LABEL: xor_v4i64:
1120; CHECK:       // %bb.0:
1121; CHECK-NEXT:    ptrue p0.d, vl4
1122; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
1123; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x1]
1124; CHECK-NEXT:    eor z0.d, z0.d, z1.d
1125; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
1126; CHECK-NEXT:    ret
1127  %op1 = load <4 x i64>, ptr %a
1128  %op2 = load <4 x i64>, ptr %b
1129  %res = xor <4 x i64> %op1, %op2
1130  store <4 x i64> %res, ptr %a
1131  ret void
1132}
1133
1134define void @xor_v8i64(ptr %a, ptr %b) #0 {
1135; VBITS_GE_256-LABEL: xor_v8i64:
1136; VBITS_GE_256:       // %bb.0:
1137; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
1138; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
1139; VBITS_GE_256-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
1140; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x1, x8, lsl #3]
1141; VBITS_GE_256-NEXT:    ld1d { z2.d }, p0/z, [x0]
1142; VBITS_GE_256-NEXT:    ld1d { z3.d }, p0/z, [x1]
1143; VBITS_GE_256-NEXT:    eor z0.d, z0.d, z1.d
1144; VBITS_GE_256-NEXT:    eor z1.d, z2.d, z3.d
1145; VBITS_GE_256-NEXT:    st1d { z0.d }, p0, [x0, x8, lsl #3]
1146; VBITS_GE_256-NEXT:    st1d { z1.d }, p0, [x0]
1147; VBITS_GE_256-NEXT:    ret
1148;
1149; VBITS_GE_512-LABEL: xor_v8i64:
1150; VBITS_GE_512:       // %bb.0:
1151; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
1152; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
1153; VBITS_GE_512-NEXT:    ld1d { z1.d }, p0/z, [x1]
1154; VBITS_GE_512-NEXT:    eor z0.d, z0.d, z1.d
1155; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x0]
1156; VBITS_GE_512-NEXT:    ret
1157  %op1 = load <8 x i64>, ptr %a
1158  %op2 = load <8 x i64>, ptr %b
1159  %res = xor <8 x i64> %op1, %op2
1160  store <8 x i64> %res, ptr %a
1161  ret void
1162}
1163
1164define void @xor_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
1165; CHECK-LABEL: xor_v16i64:
1166; CHECK:       // %bb.0:
1167; CHECK-NEXT:    ptrue p0.d, vl16
1168; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
1169; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x1]
1170; CHECK-NEXT:    eor z0.d, z0.d, z1.d
1171; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
1172; CHECK-NEXT:    ret
1173  %op1 = load <16 x i64>, ptr %a
1174  %op2 = load <16 x i64>, ptr %b
1175  %res = xor <16 x i64> %op1, %op2
1176  store <16 x i64> %res, ptr %a
1177  ret void
1178}
1179
1180define void @xor_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
1181; CHECK-LABEL: xor_v32i64:
1182; CHECK:       // %bb.0:
1183; CHECK-NEXT:    ptrue p0.d, vl32
1184; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
1185; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x1]
1186; CHECK-NEXT:    eor z0.d, z0.d, z1.d
1187; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
1188; CHECK-NEXT:    ret
1189  %op1 = load <32 x i64>, ptr %a
1190  %op2 = load <32 x i64>, ptr %b
1191  %res = xor <32 x i64> %op1, %op2
1192  store <32 x i64> %res, ptr %a
1193  ret void
1194}
1195
1196attributes #0 = { "target-features"="+sve" }
1197