xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-fixed-length-int-immediates.ll (revision 4b051b4248bb6f9971dd1cf87fe311ebe9be917e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s
3
4target triple = "aarch64-unknown-linux-gnu"
5
6; Although SVE immediate packing should be fully tested using scalable vectors,
7; these tests protects against the possibility that scalable nodes, resulting
8; from lowering fixed length vector operations, trigger different isel patterns.
9
10;
11; ADD
12;
13
14define void @add_v64i8(ptr %a) #0 {
15; CHECK-LABEL: add_v64i8:
16; CHECK:       // %bb.0:
17; CHECK-NEXT:    ptrue p0.b, vl64
18; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
19; CHECK-NEXT:    add z0.b, z0.b, #7 // =0x7
20; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
21; CHECK-NEXT:    ret
22  %op1 = load <64 x i8>, ptr %a
23  %ins = insertelement <64 x i8> undef, i8 7, i64 0
24  %op2 = shufflevector <64 x i8> %ins, <64 x i8> undef, <64 x i32> zeroinitializer
25  %res = add <64 x i8> %op1, %op2
26  store <64 x i8> %res, ptr %a
27  ret void
28}
29
30define void @add_v32i16(ptr %a) #0 {
31; CHECK-LABEL: add_v32i16:
32; CHECK:       // %bb.0:
33; CHECK-NEXT:    ptrue p0.h, vl32
34; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
35; CHECK-NEXT:    add z0.h, z0.h, #15 // =0xf
36; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
37; CHECK-NEXT:    ret
38  %op1 = load <32 x i16>, ptr %a
39  %ins = insertelement <32 x i16> undef, i16 15, i64 0
40  %op2 = shufflevector <32 x i16> %ins, <32 x i16> undef, <32 x i32> zeroinitializer
41  %res = add <32 x i16> %op1, %op2
42  store <32 x i16> %res, ptr %a
43  ret void
44}
45
46define void @add_v16i32(ptr %a) #0 {
47; CHECK-LABEL: add_v16i32:
48; CHECK:       // %bb.0:
49; CHECK-NEXT:    ptrue p0.s, vl16
50; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
51; CHECK-NEXT:    add z0.s, z0.s, #31 // =0x1f
52; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
53; CHECK-NEXT:    ret
54  %op1 = load <16 x i32>, ptr %a
55  %ins = insertelement <16 x i32> undef, i32 31, i64 0
56  %op2 = shufflevector <16 x i32> %ins, <16 x i32> undef, <16 x i32> zeroinitializer
57  %res = add <16 x i32> %op1, %op2
58  store <16 x i32> %res, ptr %a
59  ret void
60}
61
62define void @add_v8i64(ptr %a) #0 {
63; CHECK-LABEL: add_v8i64:
64; CHECK:       // %bb.0:
65; CHECK-NEXT:    ptrue p0.d, vl8
66; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
67; CHECK-NEXT:    add z0.d, z0.d, #63 // =0x3f
68; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
69; CHECK-NEXT:    ret
70  %op1 = load <8 x i64>, ptr %a
71  %ins = insertelement <8 x i64> undef, i64 63, i64 0
72  %op2 = shufflevector <8 x i64> %ins, <8 x i64> undef, <8 x i32> zeroinitializer
73  %res = add <8 x i64> %op1, %op2
74  store <8 x i64> %res, ptr %a
75  ret void
76}
77
78;
79; AND
80;
81
82define void @and_v64i8(ptr %a) #0 {
83; CHECK-LABEL: and_v64i8:
84; CHECK:       // %bb.0:
85; CHECK-NEXT:    ptrue p0.b, vl64
86; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
87; CHECK-NEXT:    and z0.b, z0.b, #0x7
88; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
89; CHECK-NEXT:    ret
90  %op1 = load <64 x i8>, ptr %a
91  %ins = insertelement <64 x i8> undef, i8 7, i64 0
92  %op2 = shufflevector <64 x i8> %ins, <64 x i8> undef, <64 x i32> zeroinitializer
93  %res = and <64 x i8> %op1, %op2
94  store <64 x i8> %res, ptr %a
95  ret void
96}
97
98define void @and_v32i16(ptr %a) #0 {
99; CHECK-LABEL: and_v32i16:
100; CHECK:       // %bb.0:
101; CHECK-NEXT:    ptrue p0.h, vl32
102; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
103; CHECK-NEXT:    and z0.h, z0.h, #0xf
104; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
105; CHECK-NEXT:    ret
106  %op1 = load <32 x i16>, ptr %a
107  %ins = insertelement <32 x i16> undef, i16 15, i64 0
108  %op2 = shufflevector <32 x i16> %ins, <32 x i16> undef, <32 x i32> zeroinitializer
109  %res = and <32 x i16> %op1, %op2
110  store <32 x i16> %res, ptr %a
111  ret void
112}
113
114define void @and_v16i32(ptr %a) #0 {
115; CHECK-LABEL: and_v16i32:
116; CHECK:       // %bb.0:
117; CHECK-NEXT:    ptrue p0.s, vl16
118; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
119; CHECK-NEXT:    and z0.s, z0.s, #0x1f
120; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
121; CHECK-NEXT:    ret
122  %op1 = load <16 x i32>, ptr %a
123  %ins = insertelement <16 x i32> undef, i32 31, i64 0
124  %op2 = shufflevector <16 x i32> %ins, <16 x i32> undef, <16 x i32> zeroinitializer
125  %res = and <16 x i32> %op1, %op2
126  store <16 x i32> %res, ptr %a
127  ret void
128}
129
130define void @and_v8i64(ptr %a) #0 {
131; CHECK-LABEL: and_v8i64:
132; CHECK:       // %bb.0:
133; CHECK-NEXT:    ptrue p0.d, vl8
134; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
135; CHECK-NEXT:    and z0.d, z0.d, #0x3f
136; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
137; CHECK-NEXT:    ret
138  %op1 = load <8 x i64>, ptr %a
139  %ins = insertelement <8 x i64> undef, i64 63, i64 0
140  %op2 = shufflevector <8 x i64> %ins, <8 x i64> undef, <8 x i32> zeroinitializer
141  %res = and <8 x i64> %op1, %op2
142  store <8 x i64> %res, ptr %a
143  ret void
144}
145
146;
147; ASHR
148;
149
150define void @ashr_v64i8(ptr %a) #0 {
151; CHECK-LABEL: ashr_v64i8:
152; CHECK:       // %bb.0:
153; CHECK-NEXT:    ptrue p0.b, vl64
154; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
155; CHECK-NEXT:    asr z0.b, z0.b, #7
156; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
157; CHECK-NEXT:    ret
158  %op1 = load <64 x i8>, ptr %a
159  %ins = insertelement <64 x i8> undef, i8 7, i64 0
160  %op2 = shufflevector <64 x i8> %ins, <64 x i8> undef, <64 x i32> zeroinitializer
161  %res = ashr <64 x i8> %op1, %op2
162  store <64 x i8> %res, ptr %a
163  ret void
164}
165
166define void @ashr_v32i16(ptr %a) #0 {
167; CHECK-LABEL: ashr_v32i16:
168; CHECK:       // %bb.0:
169; CHECK-NEXT:    ptrue p0.h, vl32
170; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
171; CHECK-NEXT:    asr z0.h, z0.h, #15
172; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
173; CHECK-NEXT:    ret
174  %op1 = load <32 x i16>, ptr %a
175  %ins = insertelement <32 x i16> undef, i16 15, i64 0
176  %op2 = shufflevector <32 x i16> %ins, <32 x i16> undef, <32 x i32> zeroinitializer
177  %res = ashr <32 x i16> %op1, %op2
178  store <32 x i16> %res, ptr %a
179  ret void
180}
181
182define void @ashr_v16i32(ptr %a) #0 {
183; CHECK-LABEL: ashr_v16i32:
184; CHECK:       // %bb.0:
185; CHECK-NEXT:    ptrue p0.s, vl16
186; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
187; CHECK-NEXT:    asr z0.s, z0.s, #31
188; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
189; CHECK-NEXT:    ret
190  %op1 = load <16 x i32>, ptr %a
191  %ins = insertelement <16 x i32> undef, i32 31, i64 0
192  %op2 = shufflevector <16 x i32> %ins, <16 x i32> undef, <16 x i32> zeroinitializer
193  %res = ashr <16 x i32> %op1, %op2
194  store <16 x i32> %res, ptr %a
195  ret void
196}
197
198define void @ashr_v8i64(ptr %a) #0 {
199; CHECK-LABEL: ashr_v8i64:
200; CHECK:       // %bb.0:
201; CHECK-NEXT:    ptrue p0.d, vl8
202; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
203; CHECK-NEXT:    asr z0.d, z0.d, #63
204; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
205; CHECK-NEXT:    ret
206  %op1 = load <8 x i64>, ptr %a
207  %ins = insertelement <8 x i64> undef, i64 63, i64 0
208  %op2 = shufflevector <8 x i64> %ins, <8 x i64> undef, <8 x i32> zeroinitializer
209  %res = ashr <8 x i64> %op1, %op2
210  store <8 x i64> %res, ptr %a
211  ret void
212}
213
214;
215; ICMP
216;
217
218define void @icmp_eq_v64i8(ptr %a) #0 {
219; CHECK-LABEL: icmp_eq_v64i8:
220; CHECK:       // %bb.0:
221; CHECK-NEXT:    ptrue p0.b, vl64
222; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
223; CHECK-NEXT:    cmpeq p1.b, p0/z, z0.b, #7
224; CHECK-NEXT:    mov z0.b, p1/z, #-1 // =0xffffffffffffffff
225; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
226; CHECK-NEXT:    ret
227  %op1 = load <64 x i8>, ptr %a
228  %ins = insertelement <64 x i8> undef, i8 7, i64 0
229  %op2 = shufflevector <64 x i8> %ins, <64 x i8> undef, <64 x i32> zeroinitializer
230  %cmp = icmp eq <64 x i8> %op1, %op2
231  %res = sext <64 x i1> %cmp to <64 x i8>
232  store <64 x i8> %res, ptr %a
233  ret void
234}
235
236define void @icmp_sge_v32i16(ptr %a) #0 {
237; CHECK-LABEL: icmp_sge_v32i16:
238; CHECK:       // %bb.0:
239; CHECK-NEXT:    ptrue p0.h, vl32
240; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
241; CHECK-NEXT:    cmpge p1.h, p0/z, z0.h, #15
242; CHECK-NEXT:    mov z0.h, p1/z, #-1 // =0xffffffffffffffff
243; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
244; CHECK-NEXT:    ret
245  %op1 = load <32 x i16>, ptr %a
246  %ins = insertelement <32 x i16> undef, i16 15, i64 0
247  %op2 = shufflevector <32 x i16> %ins, <32 x i16> undef, <32 x i32> zeroinitializer
248  %cmp = icmp sge <32 x i16> %op1, %op2
249  %res = sext <32 x i1> %cmp to <32 x i16>
250  store <32 x i16> %res, ptr %a
251  ret void
252}
253
254define void @icmp_sgt_v16i32(ptr %a) #0 {
255; CHECK-LABEL: icmp_sgt_v16i32:
256; CHECK:       // %bb.0:
257; CHECK-NEXT:    ptrue p0.s, vl16
258; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
259; CHECK-NEXT:    cmpgt p1.s, p0/z, z0.s, #-16
260; CHECK-NEXT:    mov z0.s, p1/z, #-1 // =0xffffffffffffffff
261; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
262; CHECK-NEXT:    ret
263  %op1 = load <16 x i32>, ptr %a
264  %ins = insertelement <16 x i32> undef, i32 -16, i64 0
265  %op2 = shufflevector <16 x i32> %ins, <16 x i32> undef, <16 x i32> zeroinitializer
266  %cmp = icmp sgt <16 x i32> %op1, %op2
267  %res = sext <16 x i1> %cmp to <16 x i32>
268  store <16 x i32> %res, ptr %a
269  ret void
270}
271
272define void @icmp_ult_v8i64(ptr %a) #0 {
273; CHECK-LABEL: icmp_ult_v8i64:
274; CHECK:       // %bb.0:
275; CHECK-NEXT:    ptrue p0.d, vl8
276; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
277; CHECK-NEXT:    cmplo p1.d, p0/z, z0.d, #63
278; CHECK-NEXT:    mov z0.d, p1/z, #-1 // =0xffffffffffffffff
279; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
280; CHECK-NEXT:    ret
281  %op1 = load <8 x i64>, ptr %a
282  %ins = insertelement <8 x i64> undef, i64 63, i64 0
283  %op2 = shufflevector <8 x i64> %ins, <8 x i64> undef, <8 x i32> zeroinitializer
284  %cmp = icmp ult <8 x i64> %op1, %op2
285  %res = sext <8 x i1> %cmp to <8 x i64>
286  store <8 x i64> %res, ptr %a
287  ret void
288}
289
290;
291; LSHR
292;
293
294define void @lshr_v64i8(ptr %a) #0 {
295; CHECK-LABEL: lshr_v64i8:
296; CHECK:       // %bb.0:
297; CHECK-NEXT:    ptrue p0.b, vl64
298; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
299; CHECK-NEXT:    lsr z0.b, z0.b, #7
300; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
301; CHECK-NEXT:    ret
302  %op1 = load <64 x i8>, ptr %a
303  %ins = insertelement <64 x i8> undef, i8 7, i64 0
304  %op2 = shufflevector <64 x i8> %ins, <64 x i8> undef, <64 x i32> zeroinitializer
305  %res = lshr <64 x i8> %op1, %op2
306  store <64 x i8> %res, ptr %a
307  ret void
308}
309
310define void @lshr_v32i16(ptr %a) #0 {
311; CHECK-LABEL: lshr_v32i16:
312; CHECK:       // %bb.0:
313; CHECK-NEXT:    ptrue p0.h, vl32
314; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
315; CHECK-NEXT:    lsr z0.h, z0.h, #15
316; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
317; CHECK-NEXT:    ret
318  %op1 = load <32 x i16>, ptr %a
319  %ins = insertelement <32 x i16> undef, i16 15, i64 0
320  %op2 = shufflevector <32 x i16> %ins, <32 x i16> undef, <32 x i32> zeroinitializer
321  %res = lshr <32 x i16> %op1, %op2
322  store <32 x i16> %res, ptr %a
323  ret void
324}
325
326define void @lshr_v16i32(ptr %a) #0 {
327; CHECK-LABEL: lshr_v16i32:
328; CHECK:       // %bb.0:
329; CHECK-NEXT:    ptrue p0.s, vl16
330; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
331; CHECK-NEXT:    lsr z0.s, z0.s, #31
332; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
333; CHECK-NEXT:    ret
334  %op1 = load <16 x i32>, ptr %a
335  %ins = insertelement <16 x i32> undef, i32 31, i64 0
336  %op2 = shufflevector <16 x i32> %ins, <16 x i32> undef, <16 x i32> zeroinitializer
337  %res = lshr <16 x i32> %op1, %op2
338  store <16 x i32> %res, ptr %a
339  ret void
340}
341
342define void @lshr_v8i64(ptr %a) #0 {
343; CHECK-LABEL: lshr_v8i64:
344; CHECK:       // %bb.0:
345; CHECK-NEXT:    ptrue p0.d, vl8
346; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
347; CHECK-NEXT:    lsr z0.d, z0.d, #63
348; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
349; CHECK-NEXT:    ret
350  %op1 = load <8 x i64>, ptr %a
351  %ins = insertelement <8 x i64> undef, i64 63, i64 0
352  %op2 = shufflevector <8 x i64> %ins, <8 x i64> undef, <8 x i32> zeroinitializer
353  %res = lshr <8 x i64> %op1, %op2
354  store <8 x i64> %res, ptr %a
355  ret void
356}
357
358;
359; MUL
360;
361
362define void @mul_v64i8(ptr %a) #0 {
363; CHECK-LABEL: mul_v64i8:
364; CHECK:       // %bb.0:
365; CHECK-NEXT:    ptrue p0.b, vl64
366; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
367; CHECK-NEXT:    mul z0.b, z0.b, #7
368; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
369; CHECK-NEXT:    ret
370  %op1 = load <64 x i8>, ptr %a
371  %ins = insertelement <64 x i8> undef, i8 7, i64 0
372  %op2 = shufflevector <64 x i8> %ins, <64 x i8> undef, <64 x i32> zeroinitializer
373  %res = mul <64 x i8> %op1, %op2
374  store <64 x i8> %res, ptr %a
375  ret void
376}
377
378define void @mul_v32i16(ptr %a) #0 {
379; CHECK-LABEL: mul_v32i16:
380; CHECK:       // %bb.0:
381; CHECK-NEXT:    ptrue p0.h, vl32
382; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
383; CHECK-NEXT:    mul z0.h, z0.h, #15
384; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
385; CHECK-NEXT:    ret
386  %op1 = load <32 x i16>, ptr %a
387  %ins = insertelement <32 x i16> undef, i16 15, i64 0
388  %op2 = shufflevector <32 x i16> %ins, <32 x i16> undef, <32 x i32> zeroinitializer
389  %res = mul <32 x i16> %op1, %op2
390  store <32 x i16> %res, ptr %a
391  ret void
392}
393
394define void @mul_v16i32(ptr %a) #0 {
395; CHECK-LABEL: mul_v16i32:
396; CHECK:       // %bb.0:
397; CHECK-NEXT:    ptrue p0.s, vl16
398; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
399; CHECK-NEXT:    mul z0.s, z0.s, #31
400; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
401; CHECK-NEXT:    ret
402  %op1 = load <16 x i32>, ptr %a
403  %ins = insertelement <16 x i32> undef, i32 31, i64 0
404  %op2 = shufflevector <16 x i32> %ins, <16 x i32> undef, <16 x i32> zeroinitializer
405  %res = mul <16 x i32> %op1, %op2
406  store <16 x i32> %res, ptr %a
407  ret void
408}
409
410define void @mul_v8i64(ptr %a) #0 {
411; CHECK-LABEL: mul_v8i64:
412; CHECK:       // %bb.0:
413; CHECK-NEXT:    ptrue p0.d, vl8
414; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
415; CHECK-NEXT:    mul z0.d, z0.d, #63
416; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
417; CHECK-NEXT:    ret
418  %op1 = load <8 x i64>, ptr %a
419  %ins = insertelement <8 x i64> undef, i64 63, i64 0
420  %op2 = shufflevector <8 x i64> %ins, <8 x i64> undef, <8 x i32> zeroinitializer
421  %res = mul <8 x i64> %op1, %op2
422  store <8 x i64> %res, ptr %a
423  ret void
424}
425
426;
427; OR
428;
429
430define void @or_v64i8(ptr %a) #0 {
431; CHECK-LABEL: or_v64i8:
432; CHECK:       // %bb.0:
433; CHECK-NEXT:    ptrue p0.b, vl64
434; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
435; CHECK-NEXT:    orr z0.b, z0.b, #0x7
436; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
437; CHECK-NEXT:    ret
438  %op1 = load <64 x i8>, ptr %a
439  %ins = insertelement <64 x i8> undef, i8 7, i64 0
440  %op2 = shufflevector <64 x i8> %ins, <64 x i8> undef, <64 x i32> zeroinitializer
441  %res = or <64 x i8> %op1, %op2
442  store <64 x i8> %res, ptr %a
443  ret void
444}
445
446define void @or_v32i16(ptr %a) #0 {
447; CHECK-LABEL: or_v32i16:
448; CHECK:       // %bb.0:
449; CHECK-NEXT:    ptrue p0.h, vl32
450; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
451; CHECK-NEXT:    orr z0.h, z0.h, #0xf
452; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
453; CHECK-NEXT:    ret
454  %op1 = load <32 x i16>, ptr %a
455  %ins = insertelement <32 x i16> undef, i16 15, i64 0
456  %op2 = shufflevector <32 x i16> %ins, <32 x i16> undef, <32 x i32> zeroinitializer
457  %res = or <32 x i16> %op1, %op2
458  store <32 x i16> %res, ptr %a
459  ret void
460}
461
462define void @or_v16i32(ptr %a) #0 {
463; CHECK-LABEL: or_v16i32:
464; CHECK:       // %bb.0:
465; CHECK-NEXT:    ptrue p0.s, vl16
466; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
467; CHECK-NEXT:    orr z0.s, z0.s, #0x1f
468; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
469; CHECK-NEXT:    ret
470  %op1 = load <16 x i32>, ptr %a
471  %ins = insertelement <16 x i32> undef, i32 31, i64 0
472  %op2 = shufflevector <16 x i32> %ins, <16 x i32> undef, <16 x i32> zeroinitializer
473  %res = or <16 x i32> %op1, %op2
474  store <16 x i32> %res, ptr %a
475  ret void
476}
477
478define void @or_v8i64(ptr %a) #0 {
479; CHECK-LABEL: or_v8i64:
480; CHECK:       // %bb.0:
481; CHECK-NEXT:    ptrue p0.d, vl8
482; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
483; CHECK-NEXT:    orr z0.d, z0.d, #0x3f
484; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
485; CHECK-NEXT:    ret
486  %op1 = load <8 x i64>, ptr %a
487  %ins = insertelement <8 x i64> undef, i64 63, i64 0
488  %op2 = shufflevector <8 x i64> %ins, <8 x i64> undef, <8 x i32> zeroinitializer
489  %res = or <8 x i64> %op1, %op2
490  store <8 x i64> %res, ptr %a
491  ret void
492}
493
494;
495; SHL
496;
497
498define void @shl_v64i8(ptr %a) #0 {
499; CHECK-LABEL: shl_v64i8:
500; CHECK:       // %bb.0:
501; CHECK-NEXT:    ptrue p0.b, vl64
502; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
503; CHECK-NEXT:    lsl z0.b, z0.b, #7
504; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
505; CHECK-NEXT:    ret
506  %op1 = load <64 x i8>, ptr %a
507  %ins = insertelement <64 x i8> undef, i8 7, i64 0
508  %op2 = shufflevector <64 x i8> %ins, <64 x i8> undef, <64 x i32> zeroinitializer
509  %res = shl <64 x i8> %op1, %op2
510  store <64 x i8> %res, ptr %a
511  ret void
512}
513
514define void @shl_v32i16(ptr %a) #0 {
515; CHECK-LABEL: shl_v32i16:
516; CHECK:       // %bb.0:
517; CHECK-NEXT:    ptrue p0.h, vl32
518; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
519; CHECK-NEXT:    lsl z0.h, z0.h, #15
520; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
521; CHECK-NEXT:    ret
522  %op1 = load <32 x i16>, ptr %a
523  %ins = insertelement <32 x i16> undef, i16 15, i64 0
524  %op2 = shufflevector <32 x i16> %ins, <32 x i16> undef, <32 x i32> zeroinitializer
525  %res = shl <32 x i16> %op1, %op2
526  store <32 x i16> %res, ptr %a
527  ret void
528}
529
530define void @shl_v16i32(ptr %a) #0 {
531; CHECK-LABEL: shl_v16i32:
532; CHECK:       // %bb.0:
533; CHECK-NEXT:    ptrue p0.s, vl16
534; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
535; CHECK-NEXT:    lsl z0.s, z0.s, #31
536; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
537; CHECK-NEXT:    ret
538  %op1 = load <16 x i32>, ptr %a
539  %ins = insertelement <16 x i32> undef, i32 31, i64 0
540  %op2 = shufflevector <16 x i32> %ins, <16 x i32> undef, <16 x i32> zeroinitializer
541  %res = shl <16 x i32> %op1, %op2
542  store <16 x i32> %res, ptr %a
543  ret void
544}
545
546define void @shl_v8i64(ptr %a) #0 {
547; CHECK-LABEL: shl_v8i64:
548; CHECK:       // %bb.0:
549; CHECK-NEXT:    ptrue p0.d, vl8
550; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
551; CHECK-NEXT:    lsl z0.d, z0.d, #63
552; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
553; CHECK-NEXT:    ret
554  %op1 = load <8 x i64>, ptr %a
555  %ins = insertelement <8 x i64> undef, i64 63, i64 0
556  %op2 = shufflevector <8 x i64> %ins, <8 x i64> undef, <8 x i32> zeroinitializer
557  %res = shl <8 x i64> %op1, %op2
558  store <8 x i64> %res, ptr %a
559  ret void
560}
561
562;
563; SMAX
564;
565
566define void @smax_v64i8(ptr %a) #0 {
567; CHECK-LABEL: smax_v64i8:
568; CHECK:       // %bb.0:
569; CHECK-NEXT:    ptrue p0.b, vl64
570; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
571; CHECK-NEXT:    smax z0.b, z0.b, #7
572; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
573; CHECK-NEXT:    ret
574  %op1 = load <64 x i8>, ptr %a
575  %ins = insertelement <64 x i8> undef, i8 7, i64 0
576  %op2 = shufflevector <64 x i8> %ins, <64 x i8> undef, <64 x i32> zeroinitializer
577  %res = call <64 x i8> @llvm.smax.v64i8(<64 x i8> %op1, <64 x i8> %op2)
578  store <64 x i8> %res, ptr %a
579  ret void
580}
581
582define void @smax_v32i16(ptr %a) #0 {
583; CHECK-LABEL: smax_v32i16:
584; CHECK:       // %bb.0:
585; CHECK-NEXT:    ptrue p0.h, vl32
586; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
587; CHECK-NEXT:    smax z0.h, z0.h, #15
588; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
589; CHECK-NEXT:    ret
590  %op1 = load <32 x i16>, ptr %a
591  %ins = insertelement <32 x i16> undef, i16 15, i64 0
592  %op2 = shufflevector <32 x i16> %ins, <32 x i16> undef, <32 x i32> zeroinitializer
593  %res = call <32 x i16> @llvm.smax.v32i16(<32 x i16> %op1, <32 x i16> %op2)
594  store <32 x i16> %res, ptr %a
595  ret void
596}
597
598define void @smax_v16i32(ptr %a) #0 {
599; CHECK-LABEL: smax_v16i32:
600; CHECK:       // %bb.0:
601; CHECK-NEXT:    ptrue p0.s, vl16
602; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
603; CHECK-NEXT:    smax z0.s, z0.s, #31
604; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
605; CHECK-NEXT:    ret
606  %op1 = load <16 x i32>, ptr %a
607  %ins = insertelement <16 x i32> undef, i32 31, i64 0
608  %op2 = shufflevector <16 x i32> %ins, <16 x i32> undef, <16 x i32> zeroinitializer
609  %res = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %op1, <16 x i32> %op2)
610  store <16 x i32> %res, ptr %a
611  ret void
612}
613
614define void @smax_v8i64(ptr %a) #0 {
615; CHECK-LABEL: smax_v8i64:
616; CHECK:       // %bb.0:
617; CHECK-NEXT:    ptrue p0.d, vl8
618; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
619; CHECK-NEXT:    smax z0.d, z0.d, #63
620; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
621; CHECK-NEXT:    ret
622  %op1 = load <8 x i64>, ptr %a
623  %ins = insertelement <8 x i64> undef, i64 63, i64 0
624  %op2 = shufflevector <8 x i64> %ins, <8 x i64> undef, <8 x i32> zeroinitializer
625  %res = call <8 x i64> @llvm.smax.v8i64(<8 x i64> %op1, <8 x i64> %op2)
626  store <8 x i64> %res, ptr %a
627  ret void
628}
629
630;
631; SMIN
632;
633
634define void @smin_v64i8(ptr %a) #0 {
635; CHECK-LABEL: smin_v64i8:
636; CHECK:       // %bb.0:
637; CHECK-NEXT:    ptrue p0.b, vl64
638; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
639; CHECK-NEXT:    smin z0.b, z0.b, #7
640; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
641; CHECK-NEXT:    ret
642  %op1 = load <64 x i8>, ptr %a
643  %ins = insertelement <64 x i8> undef, i8 7, i64 0
644  %op2 = shufflevector <64 x i8> %ins, <64 x i8> undef, <64 x i32> zeroinitializer
645  %res = call <64 x i8> @llvm.smin.v64i8(<64 x i8> %op1, <64 x i8> %op2)
646  store <64 x i8> %res, ptr %a
647  ret void
648}
649
650define void @smin_v32i16(ptr %a) #0 {
651; CHECK-LABEL: smin_v32i16:
652; CHECK:       // %bb.0:
653; CHECK-NEXT:    ptrue p0.h, vl32
654; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
655; CHECK-NEXT:    smin z0.h, z0.h, #15
656; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
657; CHECK-NEXT:    ret
658  %op1 = load <32 x i16>, ptr %a
659  %ins = insertelement <32 x i16> undef, i16 15, i64 0
660  %op2 = shufflevector <32 x i16> %ins, <32 x i16> undef, <32 x i32> zeroinitializer
661  %res = call <32 x i16> @llvm.smin.v32i16(<32 x i16> %op1, <32 x i16> %op2)
662  store <32 x i16> %res, ptr %a
663  ret void
664}
665
666define void @smin_v16i32(ptr %a) #0 {
667; CHECK-LABEL: smin_v16i32:
668; CHECK:       // %bb.0:
669; CHECK-NEXT:    ptrue p0.s, vl16
670; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
671; CHECK-NEXT:    smin z0.s, z0.s, #31
672; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
673; CHECK-NEXT:    ret
674  %op1 = load <16 x i32>, ptr %a
675  %ins = insertelement <16 x i32> undef, i32 31, i64 0
676  %op2 = shufflevector <16 x i32> %ins, <16 x i32> undef, <16 x i32> zeroinitializer
677  %res = call <16 x i32> @llvm.smin.v16i32(<16 x i32> %op1, <16 x i32> %op2)
678  store <16 x i32> %res, ptr %a
679  ret void
680}
681
682define void @smin_v8i64(ptr %a) #0 {
683; CHECK-LABEL: smin_v8i64:
684; CHECK:       // %bb.0:
685; CHECK-NEXT:    ptrue p0.d, vl8
686; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
687; CHECK-NEXT:    smin z0.d, z0.d, #63
688; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
689; CHECK-NEXT:    ret
690  %op1 = load <8 x i64>, ptr %a
691  %ins = insertelement <8 x i64> undef, i64 63, i64 0
692  %op2 = shufflevector <8 x i64> %ins, <8 x i64> undef, <8 x i32> zeroinitializer
693  %res = call <8 x i64> @llvm.smin.v8i64(<8 x i64> %op1, <8 x i64> %op2)
694  store <8 x i64> %res, ptr %a
695  ret void
696}
697
698;
699; SUB
700;
701
702define void @sub_v64i8(ptr %a) #0 {
703; CHECK-LABEL: sub_v64i8:
704; CHECK:       // %bb.0:
705; CHECK-NEXT:    ptrue p0.b, vl64
706; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
707; CHECK-NEXT:    sub z0.b, z0.b, #7 // =0x7
708; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
709; CHECK-NEXT:    ret
710  %op1 = load <64 x i8>, ptr %a
711  %ins = insertelement <64 x i8> undef, i8 7, i64 0
712  %op2 = shufflevector <64 x i8> %ins, <64 x i8> undef, <64 x i32> zeroinitializer
713  %res = sub <64 x i8> %op1, %op2
714  store <64 x i8> %res, ptr %a
715  ret void
716}
717
718define void @sub_v32i16(ptr %a) #0 {
719; CHECK-LABEL: sub_v32i16:
720; CHECK:       // %bb.0:
721; CHECK-NEXT:    ptrue p0.h, vl32
722; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
723; CHECK-NEXT:    sub z0.h, z0.h, #15 // =0xf
724; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
725; CHECK-NEXT:    ret
726  %op1 = load <32 x i16>, ptr %a
727  %ins = insertelement <32 x i16> undef, i16 15, i64 0
728  %op2 = shufflevector <32 x i16> %ins, <32 x i16> undef, <32 x i32> zeroinitializer
729  %res = sub <32 x i16> %op1, %op2
730  store <32 x i16> %res, ptr %a
731  ret void
732}
733
734define void @sub_v16i32(ptr %a) #0 {
735; CHECK-LABEL: sub_v16i32:
736; CHECK:       // %bb.0:
737; CHECK-NEXT:    ptrue p0.s, vl16
738; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
739; CHECK-NEXT:    sub z0.s, z0.s, #31 // =0x1f
740; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
741; CHECK-NEXT:    ret
742  %op1 = load <16 x i32>, ptr %a
743  %ins = insertelement <16 x i32> undef, i32 31, i64 0
744  %op2 = shufflevector <16 x i32> %ins, <16 x i32> undef, <16 x i32> zeroinitializer
745  %res = sub <16 x i32> %op1, %op2
746  store <16 x i32> %res, ptr %a
747  ret void
748}
749
750define void @sub_v8i64(ptr %a) #0 {
751; CHECK-LABEL: sub_v8i64:
752; CHECK:       // %bb.0:
753; CHECK-NEXT:    ptrue p0.d, vl8
754; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
755; CHECK-NEXT:    sub z0.d, z0.d, #63 // =0x3f
756; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
757; CHECK-NEXT:    ret
758  %op1 = load <8 x i64>, ptr %a
759  %ins = insertelement <8 x i64> undef, i64 63, i64 0
760  %op2 = shufflevector <8 x i64> %ins, <8 x i64> undef, <8 x i32> zeroinitializer
761  %res = sub <8 x i64> %op1, %op2
762  store <8 x i64> %res, ptr %a
763  ret void
764}
765
766;
767; UMAX
768;
769
770define void @umax_v64i8(ptr %a) #0 {
771; CHECK-LABEL: umax_v64i8:
772; CHECK:       // %bb.0:
773; CHECK-NEXT:    ptrue p0.b, vl64
774; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
775; CHECK-NEXT:    umax z0.b, z0.b, #7
776; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
777; CHECK-NEXT:    ret
778  %op1 = load <64 x i8>, ptr %a
779  %ins = insertelement <64 x i8> undef, i8 7, i64 0
780  %op2 = shufflevector <64 x i8> %ins, <64 x i8> undef, <64 x i32> zeroinitializer
781  %res = call <64 x i8> @llvm.umax.v64i8(<64 x i8> %op1, <64 x i8> %op2)
782  store <64 x i8> %res, ptr %a
783  ret void
784}
785
786define void @umax_v32i16(ptr %a) #0 {
787; CHECK-LABEL: umax_v32i16:
788; CHECK:       // %bb.0:
789; CHECK-NEXT:    ptrue p0.h, vl32
790; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
791; CHECK-NEXT:    umax z0.h, z0.h, #15
792; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
793; CHECK-NEXT:    ret
794  %op1 = load <32 x i16>, ptr %a
795  %ins = insertelement <32 x i16> undef, i16 15, i64 0
796  %op2 = shufflevector <32 x i16> %ins, <32 x i16> undef, <32 x i32> zeroinitializer
797  %res = call <32 x i16> @llvm.umax.v32i16(<32 x i16> %op1, <32 x i16> %op2)
798  store <32 x i16> %res, ptr %a
799  ret void
800}
801
802define void @umax_v16i32(ptr %a) #0 {
803; CHECK-LABEL: umax_v16i32:
804; CHECK:       // %bb.0:
805; CHECK-NEXT:    ptrue p0.s, vl16
806; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
807; CHECK-NEXT:    umax z0.s, z0.s, #31
808; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
809; CHECK-NEXT:    ret
810  %op1 = load <16 x i32>, ptr %a
811  %ins = insertelement <16 x i32> undef, i32 31, i64 0
812  %op2 = shufflevector <16 x i32> %ins, <16 x i32> undef, <16 x i32> zeroinitializer
813  %res = call <16 x i32> @llvm.umax.v16i32(<16 x i32> %op1, <16 x i32> %op2)
814  store <16 x i32> %res, ptr %a
815  ret void
816}
817
818define void @umax_v8i64(ptr %a) #0 {
819; CHECK-LABEL: umax_v8i64:
820; CHECK:       // %bb.0:
821; CHECK-NEXT:    ptrue p0.d, vl8
822; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
823; CHECK-NEXT:    umax z0.d, z0.d, #63
824; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
825; CHECK-NEXT:    ret
826  %op1 = load <8 x i64>, ptr %a
827  %ins = insertelement <8 x i64> undef, i64 63, i64 0
828  %op2 = shufflevector <8 x i64> %ins, <8 x i64> undef, <8 x i32> zeroinitializer
829  %res = call <8 x i64> @llvm.umax.v8i64(<8 x i64> %op1, <8 x i64> %op2)
830  store <8 x i64> %res, ptr %a
831  ret void
832}
833
834;
835; UMIN
836;
837
838define void @umin_v64i8(ptr %a) #0 {
839; CHECK-LABEL: umin_v64i8:
840; CHECK:       // %bb.0:
841; CHECK-NEXT:    ptrue p0.b, vl64
842; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
843; CHECK-NEXT:    umin z0.b, z0.b, #7
844; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
845; CHECK-NEXT:    ret
846  %op1 = load <64 x i8>, ptr %a
847  %ins = insertelement <64 x i8> undef, i8 7, i64 0
848  %op2 = shufflevector <64 x i8> %ins, <64 x i8> undef, <64 x i32> zeroinitializer
849  %res = call <64 x i8> @llvm.umin.v64i8(<64 x i8> %op1, <64 x i8> %op2)
850  store <64 x i8> %res, ptr %a
851  ret void
852}
853
854define void @umin_v32i16(ptr %a) #0 {
855; CHECK-LABEL: umin_v32i16:
856; CHECK:       // %bb.0:
857; CHECK-NEXT:    ptrue p0.h, vl32
858; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
859; CHECK-NEXT:    umin z0.h, z0.h, #15
860; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
861; CHECK-NEXT:    ret
862  %op1 = load <32 x i16>, ptr %a
863  %ins = insertelement <32 x i16> undef, i16 15, i64 0
864  %op2 = shufflevector <32 x i16> %ins, <32 x i16> undef, <32 x i32> zeroinitializer
865  %res = call <32 x i16> @llvm.umin.v32i16(<32 x i16> %op1, <32 x i16> %op2)
866  store <32 x i16> %res, ptr %a
867  ret void
868}
869
870define void @umin_v16i32(ptr %a) #0 {
871; CHECK-LABEL: umin_v16i32:
872; CHECK:       // %bb.0:
873; CHECK-NEXT:    ptrue p0.s, vl16
874; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
875; CHECK-NEXT:    umin z0.s, z0.s, #31
876; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
877; CHECK-NEXT:    ret
878  %op1 = load <16 x i32>, ptr %a
879  %ins = insertelement <16 x i32> undef, i32 31, i64 0
880  %op2 = shufflevector <16 x i32> %ins, <16 x i32> undef, <16 x i32> zeroinitializer
881  %res = call <16 x i32> @llvm.umin.v16i32(<16 x i32> %op1, <16 x i32> %op2)
882  store <16 x i32> %res, ptr %a
883  ret void
884}
885
886define void @umin_v8i64(ptr %a) #0 {
887; CHECK-LABEL: umin_v8i64:
888; CHECK:       // %bb.0:
889; CHECK-NEXT:    ptrue p0.d, vl8
890; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
891; CHECK-NEXT:    umin z0.d, z0.d, #63
892; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
893; CHECK-NEXT:    ret
894  %op1 = load <8 x i64>, ptr %a
895  %ins = insertelement <8 x i64> undef, i64 63, i64 0
896  %op2 = shufflevector <8 x i64> %ins, <8 x i64> undef, <8 x i32> zeroinitializer
897  %res = call <8 x i64> @llvm.umin.v8i64(<8 x i64> %op1, <8 x i64> %op2)
898  store <8 x i64> %res, ptr %a
899  ret void
900}
901
902;
903; XOR
904;
905
906define void @xor_v64i8(ptr %a) #0 {
907; CHECK-LABEL: xor_v64i8:
908; CHECK:       // %bb.0:
909; CHECK-NEXT:    ptrue p0.b, vl64
910; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
911; CHECK-NEXT:    eor z0.b, z0.b, #0x7
912; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
913; CHECK-NEXT:    ret
914  %op1 = load <64 x i8>, ptr %a
915  %ins = insertelement <64 x i8> undef, i8 7, i64 0
916  %op2 = shufflevector <64 x i8> %ins, <64 x i8> undef, <64 x i32> zeroinitializer
917  %res = xor <64 x i8> %op1, %op2
918  store <64 x i8> %res, ptr %a
919  ret void
920}
921
922define void @xor_v32i16(ptr %a) #0 {
923; CHECK-LABEL: xor_v32i16:
924; CHECK:       // %bb.0:
925; CHECK-NEXT:    ptrue p0.h, vl32
926; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
927; CHECK-NEXT:    eor z0.h, z0.h, #0xf
928; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
929; CHECK-NEXT:    ret
930  %op1 = load <32 x i16>, ptr %a
931  %ins = insertelement <32 x i16> undef, i16 15, i64 0
932  %op2 = shufflevector <32 x i16> %ins, <32 x i16> undef, <32 x i32> zeroinitializer
933  %res = xor <32 x i16> %op1, %op2
934  store <32 x i16> %res, ptr %a
935  ret void
936}
937
938define void @xor_v16i32(ptr %a) #0 {
939; CHECK-LABEL: xor_v16i32:
940; CHECK:       // %bb.0:
941; CHECK-NEXT:    ptrue p0.s, vl16
942; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
943; CHECK-NEXT:    eor z0.s, z0.s, #0x1f
944; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
945; CHECK-NEXT:    ret
946  %op1 = load <16 x i32>, ptr %a
947  %ins = insertelement <16 x i32> undef, i32 31, i64 0
948  %op2 = shufflevector <16 x i32> %ins, <16 x i32> undef, <16 x i32> zeroinitializer
949  %res = xor <16 x i32> %op1, %op2
950  store <16 x i32> %res, ptr %a
951  ret void
952}
953
954define void @xor_v8i64(ptr %a) #0 {
955; CHECK-LABEL: xor_v8i64:
956; CHECK:       // %bb.0:
957; CHECK-NEXT:    ptrue p0.d, vl8
958; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
959; CHECK-NEXT:    eor z0.d, z0.d, #0x3f
960; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
961; CHECK-NEXT:    ret
962  %op1 = load <8 x i64>, ptr %a
963  %ins = insertelement <8 x i64> undef, i64 63, i64 0
964  %op2 = shufflevector <8 x i64> %ins, <8 x i64> undef, <8 x i32> zeroinitializer
965  %res = xor <8 x i64> %op1, %op2
966  store <8 x i64> %res, ptr %a
967  ret void
968}
969
970declare <64 x i8> @llvm.smax.v64i8(<64 x i8>, <64 x i8>)
971declare <32 x i16> @llvm.smax.v32i16(<32 x i16>, <32 x i16>)
972declare <16 x i32> @llvm.smax.v16i32(<16 x i32>, <16 x i32>)
973declare <8 x i64> @llvm.smax.v8i64(<8 x i64>, <8 x i64>)
974
975declare <64 x i8> @llvm.smin.v64i8(<64 x i8>, <64 x i8>)
976declare <32 x i16> @llvm.smin.v32i16(<32 x i16>, <32 x i16>)
977declare <16 x i32> @llvm.smin.v16i32(<16 x i32>, <16 x i32>)
978declare <8 x i64> @llvm.smin.v8i64(<8 x i64>, <8 x i64>)
979
980declare <64 x i8> @llvm.umax.v64i8(<64 x i8>, <64 x i8>)
981declare <32 x i16> @llvm.umax.v32i16(<32 x i16>, <32 x i16>)
982declare <16 x i32> @llvm.umax.v16i32(<16 x i32>, <16 x i32>)
983declare <8 x i64> @llvm.umax.v8i64(<8 x i64>, <8 x i64>)
984
985declare <64 x i8> @llvm.umin.v64i8(<64 x i8>, <64 x i8>)
986declare <32 x i16> @llvm.umin.v32i16(<32 x i16>, <32 x i16>)
987declare <16 x i32> @llvm.umin.v16i32(<16 x i32>, <16 x i32>)
988declare <8 x i64> @llvm.umin.v8i64(<8 x i64>, <8 x i64>)
989
990attributes #0 = { "target-features"="+sve" }
991