xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-fixed-length-int-compares.ll (revision ab7110bcd6b137803935508de8c9f6af377f9454)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -aarch64-sve-vector-bits-min=256  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
3; RUN: llc -aarch64-sve-vector-bits-min=512  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
4; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
5
6target triple = "aarch64-unknown-linux-gnu"
7
8;
9; ICMP EQ
10;
11
12; Don't use SVE for 64-bit vectors.
13define <8 x i8> @icmp_eq_v8i8(<8 x i8> %op1, <8 x i8> %op2) vscale_range(2,0) #0 {
14; CHECK-LABEL: icmp_eq_v8i8:
15; CHECK:       // %bb.0:
16; CHECK-NEXT:    cmeq v0.8b, v0.8b, v1.8b
17; CHECK-NEXT:    ret
18  %cmp = icmp eq <8 x i8> %op1, %op2
19  %sext = sext <8 x i1> %cmp to <8 x i8>
20  ret <8 x i8> %sext
21}
22
23; Don't use SVE for 128-bit vectors.
24define <16 x i8> @icmp_eq_v16i8(<16 x i8> %op1, <16 x i8> %op2) vscale_range(2,0) #0 {
25; CHECK-LABEL: icmp_eq_v16i8:
26; CHECK:       // %bb.0:
27; CHECK-NEXT:    cmeq v0.16b, v0.16b, v1.16b
28; CHECK-NEXT:    ret
29  %cmp = icmp eq <16 x i8> %op1, %op2
30  %sext = sext <16 x i1> %cmp to <16 x i8>
31  ret <16 x i8> %sext
32}
33
34define void @icmp_eq_v32i8(ptr %a, ptr %b) vscale_range(2,0) #0 {
35; CHECK-LABEL: icmp_eq_v32i8:
36; CHECK:       // %bb.0:
37; CHECK-NEXT:    ptrue p0.b, vl32
38; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
39; CHECK-NEXT:    ld1b { z1.b }, p0/z, [x1]
40; CHECK-NEXT:    cmpeq p1.b, p0/z, z0.b, z1.b
41; CHECK-NEXT:    mov z0.b, p1/z, #-1 // =0xffffffffffffffff
42; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
43; CHECK-NEXT:    ret
44  %op1 = load <32 x i8>, ptr %a
45  %op2 = load <32 x i8>, ptr %b
46  %cmp = icmp eq <32 x i8> %op1, %op2
47  %sext = sext <32 x i1> %cmp to <32 x i8>
48  store <32 x i8> %sext, ptr %a
49  ret void
50}
51
52define void @icmp_eq_v64i8(ptr %a, ptr %b) #0 {
53; VBITS_GE_256-LABEL: icmp_eq_v64i8:
54; VBITS_GE_256:       // %bb.0:
55; VBITS_GE_256-NEXT:    ptrue p0.b, vl32
56; VBITS_GE_256-NEXT:    mov w8, #32 // =0x20
57; VBITS_GE_256-NEXT:    ld1b { z0.b }, p0/z, [x0, x8]
58; VBITS_GE_256-NEXT:    ld1b { z1.b }, p0/z, [x1, x8]
59; VBITS_GE_256-NEXT:    ld1b { z2.b }, p0/z, [x0]
60; VBITS_GE_256-NEXT:    ld1b { z3.b }, p0/z, [x1]
61; VBITS_GE_256-NEXT:    cmpeq p1.b, p0/z, z0.b, z1.b
62; VBITS_GE_256-NEXT:    cmpeq p2.b, p0/z, z2.b, z3.b
63; VBITS_GE_256-NEXT:    mov z0.b, p1/z, #-1 // =0xffffffffffffffff
64; VBITS_GE_256-NEXT:    mov z1.b, p2/z, #-1 // =0xffffffffffffffff
65; VBITS_GE_256-NEXT:    st1b { z0.b }, p0, [x0, x8]
66; VBITS_GE_256-NEXT:    st1b { z1.b }, p0, [x0]
67; VBITS_GE_256-NEXT:    ret
68;
69; VBITS_GE_512-LABEL: icmp_eq_v64i8:
70; VBITS_GE_512:       // %bb.0:
71; VBITS_GE_512-NEXT:    ptrue p0.b, vl64
72; VBITS_GE_512-NEXT:    ld1b { z0.b }, p0/z, [x0]
73; VBITS_GE_512-NEXT:    ld1b { z1.b }, p0/z, [x1]
74; VBITS_GE_512-NEXT:    cmpeq p1.b, p0/z, z0.b, z1.b
75; VBITS_GE_512-NEXT:    mov z0.b, p1/z, #-1 // =0xffffffffffffffff
76; VBITS_GE_512-NEXT:    st1b { z0.b }, p0, [x0]
77; VBITS_GE_512-NEXT:    ret
78  %op1 = load <64 x i8>, ptr %a
79  %op2 = load <64 x i8>, ptr %b
80  %cmp = icmp eq <64 x i8> %op1, %op2
81  %sext = sext <64 x i1> %cmp to <64 x i8>
82  store <64 x i8> %sext, ptr %a
83  ret void
84}
85
86define void @icmp_eq_v128i8(ptr %a, ptr %b) vscale_range(8,0) #0 {
87; CHECK-LABEL: icmp_eq_v128i8:
88; CHECK:       // %bb.0:
89; CHECK-NEXT:    ptrue p0.b, vl128
90; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
91; CHECK-NEXT:    ld1b { z1.b }, p0/z, [x1]
92; CHECK-NEXT:    cmpeq p1.b, p0/z, z0.b, z1.b
93; CHECK-NEXT:    mov z0.b, p1/z, #-1 // =0xffffffffffffffff
94; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
95; CHECK-NEXT:    ret
96  %op1 = load <128 x i8>, ptr %a
97  %op2 = load <128 x i8>, ptr %b
98  %cmp = icmp eq <128 x i8> %op1, %op2
99  %sext = sext <128 x i1> %cmp to <128 x i8>
100  store <128 x i8> %sext, ptr %a
101  ret void
102}
103
104define void @icmp_eq_v256i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
105; CHECK-LABEL: icmp_eq_v256i8:
106; CHECK:       // %bb.0:
107; CHECK-NEXT:    ptrue p0.b, vl256
108; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
109; CHECK-NEXT:    ld1b { z1.b }, p0/z, [x1]
110; CHECK-NEXT:    cmpeq p1.b, p0/z, z0.b, z1.b
111; CHECK-NEXT:    mov z0.b, p1/z, #-1 // =0xffffffffffffffff
112; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
113; CHECK-NEXT:    ret
114  %op1 = load <256 x i8>, ptr %a
115  %op2 = load <256 x i8>, ptr %b
116  %cmp = icmp eq <256 x i8> %op1, %op2
117  %sext = sext <256 x i1> %cmp to <256 x i8>
118  store <256 x i8> %sext, ptr %a
119  ret void
120}
121
122; Don't use SVE for 64-bit vectors.
123define <4 x i16> @icmp_eq_v4i16(<4 x i16> %op1, <4 x i16> %op2) vscale_range(2,0) #0 {
124; CHECK-LABEL: icmp_eq_v4i16:
125; CHECK:       // %bb.0:
126; CHECK-NEXT:    cmeq v0.4h, v0.4h, v1.4h
127; CHECK-NEXT:    ret
128  %cmp = icmp eq <4 x i16> %op1, %op2
129  %sext = sext <4 x i1> %cmp to <4 x i16>
130  ret <4 x i16> %sext
131}
132
133; Don't use SVE for 128-bit vectors.
134define <8 x i16> @icmp_eq_v8i16(<8 x i16> %op1, <8 x i16> %op2) vscale_range(2,0) #0 {
135; CHECK-LABEL: icmp_eq_v8i16:
136; CHECK:       // %bb.0:
137; CHECK-NEXT:    cmeq v0.8h, v0.8h, v1.8h
138; CHECK-NEXT:    ret
139  %cmp = icmp eq <8 x i16> %op1, %op2
140  %sext = sext <8 x i1> %cmp to <8 x i16>
141  ret <8 x i16> %sext
142}
143
144define void @icmp_eq_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
145; CHECK-LABEL: icmp_eq_v16i16:
146; CHECK:       // %bb.0:
147; CHECK-NEXT:    ptrue p0.h, vl16
148; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
149; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
150; CHECK-NEXT:    cmpeq p1.h, p0/z, z0.h, z1.h
151; CHECK-NEXT:    mov z0.h, p1/z, #-1 // =0xffffffffffffffff
152; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
153; CHECK-NEXT:    ret
154  %op1 = load <16 x i16>, ptr %a
155  %op2 = load <16 x i16>, ptr %b
156  %cmp = icmp eq <16 x i16> %op1, %op2
157  %sext = sext <16 x i1> %cmp to <16 x i16>
158  store <16 x i16> %sext, ptr %a
159  ret void
160}
161
162define void @icmp_eq_v32i16(ptr %a, ptr %b) #0 {
163; VBITS_GE_256-LABEL: icmp_eq_v32i16:
164; VBITS_GE_256:       // %bb.0:
165; VBITS_GE_256-NEXT:    ptrue p0.h, vl16
166; VBITS_GE_256-NEXT:    mov x8, #16 // =0x10
167; VBITS_GE_256-NEXT:    ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
168; VBITS_GE_256-NEXT:    ld1h { z1.h }, p0/z, [x1, x8, lsl #1]
169; VBITS_GE_256-NEXT:    ld1h { z2.h }, p0/z, [x0]
170; VBITS_GE_256-NEXT:    ld1h { z3.h }, p0/z, [x1]
171; VBITS_GE_256-NEXT:    cmpeq p1.h, p0/z, z0.h, z1.h
172; VBITS_GE_256-NEXT:    cmpeq p2.h, p0/z, z2.h, z3.h
173; VBITS_GE_256-NEXT:    mov z0.h, p1/z, #-1 // =0xffffffffffffffff
174; VBITS_GE_256-NEXT:    mov z1.h, p2/z, #-1 // =0xffffffffffffffff
175; VBITS_GE_256-NEXT:    st1h { z0.h }, p0, [x0, x8, lsl #1]
176; VBITS_GE_256-NEXT:    st1h { z1.h }, p0, [x0]
177; VBITS_GE_256-NEXT:    ret
178;
179; VBITS_GE_512-LABEL: icmp_eq_v32i16:
180; VBITS_GE_512:       // %bb.0:
181; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
182; VBITS_GE_512-NEXT:    ld1h { z0.h }, p0/z, [x0]
183; VBITS_GE_512-NEXT:    ld1h { z1.h }, p0/z, [x1]
184; VBITS_GE_512-NEXT:    cmpeq p1.h, p0/z, z0.h, z1.h
185; VBITS_GE_512-NEXT:    mov z0.h, p1/z, #-1 // =0xffffffffffffffff
186; VBITS_GE_512-NEXT:    st1h { z0.h }, p0, [x0]
187; VBITS_GE_512-NEXT:    ret
188  %op1 = load <32 x i16>, ptr %a
189  %op2 = load <32 x i16>, ptr %b
190  %cmp = icmp eq <32 x i16> %op1, %op2
191  %sext = sext <32 x i1> %cmp to <32 x i16>
192  store <32 x i16> %sext, ptr %a
193  ret void
194}
195
196define void @icmp_eq_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
197; CHECK-LABEL: icmp_eq_v64i16:
198; CHECK:       // %bb.0:
199; CHECK-NEXT:    ptrue p0.h, vl64
200; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
201; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
202; CHECK-NEXT:    cmpeq p1.h, p0/z, z0.h, z1.h
203; CHECK-NEXT:    mov z0.h, p1/z, #-1 // =0xffffffffffffffff
204; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
205; CHECK-NEXT:    ret
206  %op1 = load <64 x i16>, ptr %a
207  %op2 = load <64 x i16>, ptr %b
208  %cmp = icmp eq <64 x i16> %op1, %op2
209  %sext = sext <64 x i1> %cmp to <64 x i16>
210  store <64 x i16> %sext, ptr %a
211  ret void
212}
213
214define void @icmp_eq_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
215; CHECK-LABEL: icmp_eq_v128i16:
216; CHECK:       // %bb.0:
217; CHECK-NEXT:    ptrue p0.h, vl128
218; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
219; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
220; CHECK-NEXT:    cmpeq p1.h, p0/z, z0.h, z1.h
221; CHECK-NEXT:    mov z0.h, p1/z, #-1 // =0xffffffffffffffff
222; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
223; CHECK-NEXT:    ret
224  %op1 = load <128 x i16>, ptr %a
225  %op2 = load <128 x i16>, ptr %b
226  %cmp = icmp eq <128 x i16> %op1, %op2
227  %sext = sext <128 x i1> %cmp to <128 x i16>
228  store <128 x i16> %sext, ptr %a
229  ret void
230}
231
232; Don't use SVE for 64-bit vectors.
233define <2 x i32> @icmp_eq_v2i32(<2 x i32> %op1, <2 x i32> %op2) vscale_range(2,0) #0 {
234; CHECK-LABEL: icmp_eq_v2i32:
235; CHECK:       // %bb.0:
236; CHECK-NEXT:    cmeq v0.2s, v0.2s, v1.2s
237; CHECK-NEXT:    ret
238  %cmp = icmp eq <2 x i32> %op1, %op2
239  %sext = sext <2 x i1> %cmp to <2 x i32>
240  ret <2 x i32> %sext
241}
242
243; Don't use SVE for 128-bit vectors.
244define <4 x i32> @icmp_eq_v4i32(<4 x i32> %op1, <4 x i32> %op2) vscale_range(2,0) #0 {
245; CHECK-LABEL: icmp_eq_v4i32:
246; CHECK:       // %bb.0:
247; CHECK-NEXT:    cmeq v0.4s, v0.4s, v1.4s
248; CHECK-NEXT:    ret
249  %cmp = icmp eq <4 x i32> %op1, %op2
250  %sext = sext <4 x i1> %cmp to <4 x i32>
251  ret <4 x i32> %sext
252}
253
254define void @icmp_eq_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
255; CHECK-LABEL: icmp_eq_v8i32:
256; CHECK:       // %bb.0:
257; CHECK-NEXT:    ptrue p0.s, vl8
258; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
259; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x1]
260; CHECK-NEXT:    cmpeq p1.s, p0/z, z0.s, z1.s
261; CHECK-NEXT:    mov z0.s, p1/z, #-1 // =0xffffffffffffffff
262; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
263; CHECK-NEXT:    ret
264  %op1 = load <8 x i32>, ptr %a
265  %op2 = load <8 x i32>, ptr %b
266  %cmp = icmp eq <8 x i32> %op1, %op2
267  %sext = sext <8 x i1> %cmp to <8 x i32>
268  store <8 x i32> %sext, ptr %a
269  ret void
270}
271
272define void @icmp_eq_v16i32(ptr %a, ptr %b) #0 {
273; VBITS_GE_256-LABEL: icmp_eq_v16i32:
274; VBITS_GE_256:       // %bb.0:
275; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
276; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
277; VBITS_GE_256-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
278; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x1, x8, lsl #2]
279; VBITS_GE_256-NEXT:    ld1w { z2.s }, p0/z, [x0]
280; VBITS_GE_256-NEXT:    ld1w { z3.s }, p0/z, [x1]
281; VBITS_GE_256-NEXT:    cmpeq p1.s, p0/z, z0.s, z1.s
282; VBITS_GE_256-NEXT:    cmpeq p2.s, p0/z, z2.s, z3.s
283; VBITS_GE_256-NEXT:    mov z0.s, p1/z, #-1 // =0xffffffffffffffff
284; VBITS_GE_256-NEXT:    mov z1.s, p2/z, #-1 // =0xffffffffffffffff
285; VBITS_GE_256-NEXT:    st1w { z0.s }, p0, [x0, x8, lsl #2]
286; VBITS_GE_256-NEXT:    st1w { z1.s }, p0, [x0]
287; VBITS_GE_256-NEXT:    ret
288;
289; VBITS_GE_512-LABEL: icmp_eq_v16i32:
290; VBITS_GE_512:       // %bb.0:
291; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
292; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
293; VBITS_GE_512-NEXT:    ld1w { z1.s }, p0/z, [x1]
294; VBITS_GE_512-NEXT:    cmpeq p1.s, p0/z, z0.s, z1.s
295; VBITS_GE_512-NEXT:    mov z0.s, p1/z, #-1 // =0xffffffffffffffff
296; VBITS_GE_512-NEXT:    st1w { z0.s }, p0, [x0]
297; VBITS_GE_512-NEXT:    ret
298  %op1 = load <16 x i32>, ptr %a
299  %op2 = load <16 x i32>, ptr %b
300  %cmp = icmp eq <16 x i32> %op1, %op2
301  %sext = sext <16 x i1> %cmp to <16 x i32>
302  store <16 x i32> %sext, ptr %a
303  ret void
304}
305
306define void @icmp_eq_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
307; CHECK-LABEL: icmp_eq_v32i32:
308; CHECK:       // %bb.0:
309; CHECK-NEXT:    ptrue p0.s, vl32
310; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
311; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x1]
312; CHECK-NEXT:    cmpeq p1.s, p0/z, z0.s, z1.s
313; CHECK-NEXT:    mov z0.s, p1/z, #-1 // =0xffffffffffffffff
314; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
315; CHECK-NEXT:    ret
316  %op1 = load <32 x i32>, ptr %a
317  %op2 = load <32 x i32>, ptr %b
318  %cmp = icmp eq <32 x i32> %op1, %op2
319  %sext = sext <32 x i1> %cmp to <32 x i32>
320  store <32 x i32> %sext, ptr %a
321  ret void
322}
323
324define void @icmp_eq_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
325; CHECK-LABEL: icmp_eq_v64i32:
326; CHECK:       // %bb.0:
327; CHECK-NEXT:    ptrue p0.s, vl64
328; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
329; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x1]
330; CHECK-NEXT:    cmpeq p1.s, p0/z, z0.s, z1.s
331; CHECK-NEXT:    mov z0.s, p1/z, #-1 // =0xffffffffffffffff
332; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
333; CHECK-NEXT:    ret
334  %op1 = load <64 x i32>, ptr %a
335  %op2 = load <64 x i32>, ptr %b
336  %cmp = icmp eq <64 x i32> %op1, %op2
337  %sext = sext <64 x i1> %cmp to <64 x i32>
338  store <64 x i32> %sext, ptr %a
339  ret void
340}
341
342; Don't use SVE for 64-bit vectors.
343define <1 x i64> @icmp_eq_v1i64(<1 x i64> %op1, <1 x i64> %op2) vscale_range(2,0) #0 {
344; CHECK-LABEL: icmp_eq_v1i64:
345; CHECK:       // %bb.0:
346; CHECK-NEXT:    cmeq d0, d0, d1
347; CHECK-NEXT:    ret
348  %cmp = icmp eq <1 x i64> %op1, %op2
349  %sext = sext <1 x i1> %cmp to <1 x i64>
350  ret <1 x i64> %sext
351}
352
353; Don't use SVE for 128-bit vectors.
354define <2 x i64> @icmp_eq_v2i64(<2 x i64> %op1, <2 x i64> %op2) vscale_range(2,0) #0 {
355; CHECK-LABEL: icmp_eq_v2i64:
356; CHECK:       // %bb.0:
357; CHECK-NEXT:    cmeq v0.2d, v0.2d, v1.2d
358; CHECK-NEXT:    ret
359  %cmp = icmp eq <2 x i64> %op1, %op2
360  %sext = sext <2 x i1> %cmp to <2 x i64>
361  ret <2 x i64> %sext
362}
363
364define void @icmp_eq_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
365; CHECK-LABEL: icmp_eq_v4i64:
366; CHECK:       // %bb.0:
367; CHECK-NEXT:    ptrue p0.d, vl4
368; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
369; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x1]
370; CHECK-NEXT:    cmpeq p1.d, p0/z, z0.d, z1.d
371; CHECK-NEXT:    mov z0.d, p1/z, #-1 // =0xffffffffffffffff
372; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
373; CHECK-NEXT:    ret
374  %op1 = load <4 x i64>, ptr %a
375  %op2 = load <4 x i64>, ptr %b
376  %cmp = icmp eq <4 x i64> %op1, %op2
377  %sext = sext <4 x i1> %cmp to <4 x i64>
378  store <4 x i64> %sext, ptr %a
379  ret void
380}
381
382define void @icmp_eq_v8i64(ptr %a, ptr %b) #0 {
383; VBITS_GE_256-LABEL: icmp_eq_v8i64:
384; VBITS_GE_256:       // %bb.0:
385; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
386; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
387; VBITS_GE_256-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
388; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x1, x8, lsl #3]
389; VBITS_GE_256-NEXT:    ld1d { z2.d }, p0/z, [x0]
390; VBITS_GE_256-NEXT:    ld1d { z3.d }, p0/z, [x1]
391; VBITS_GE_256-NEXT:    cmpeq p1.d, p0/z, z0.d, z1.d
392; VBITS_GE_256-NEXT:    cmpeq p2.d, p0/z, z2.d, z3.d
393; VBITS_GE_256-NEXT:    mov z0.d, p1/z, #-1 // =0xffffffffffffffff
394; VBITS_GE_256-NEXT:    mov z1.d, p2/z, #-1 // =0xffffffffffffffff
395; VBITS_GE_256-NEXT:    st1d { z0.d }, p0, [x0, x8, lsl #3]
396; VBITS_GE_256-NEXT:    st1d { z1.d }, p0, [x0]
397; VBITS_GE_256-NEXT:    ret
398;
399; VBITS_GE_512-LABEL: icmp_eq_v8i64:
400; VBITS_GE_512:       // %bb.0:
401; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
402; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
403; VBITS_GE_512-NEXT:    ld1d { z1.d }, p0/z, [x1]
404; VBITS_GE_512-NEXT:    cmpeq p1.d, p0/z, z0.d, z1.d
405; VBITS_GE_512-NEXT:    mov z0.d, p1/z, #-1 // =0xffffffffffffffff
406; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x0]
407; VBITS_GE_512-NEXT:    ret
408  %op1 = load <8 x i64>, ptr %a
409  %op2 = load <8 x i64>, ptr %b
410  %cmp = icmp eq <8 x i64> %op1, %op2
411  %sext = sext <8 x i1> %cmp to <8 x i64>
412  store <8 x i64> %sext, ptr %a
413  ret void
414}
415
416define void @icmp_eq_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
417; CHECK-LABEL: icmp_eq_v16i64:
418; CHECK:       // %bb.0:
419; CHECK-NEXT:    ptrue p0.d, vl16
420; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
421; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x1]
422; CHECK-NEXT:    cmpeq p1.d, p0/z, z0.d, z1.d
423; CHECK-NEXT:    mov z0.d, p1/z, #-1 // =0xffffffffffffffff
424; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
425; CHECK-NEXT:    ret
426  %op1 = load <16 x i64>, ptr %a
427  %op2 = load <16 x i64>, ptr %b
428  %cmp = icmp eq <16 x i64> %op1, %op2
429  %sext = sext <16 x i1> %cmp to <16 x i64>
430  store <16 x i64> %sext, ptr %a
431  ret void
432}
433
434define void @icmp_eq_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
435; CHECK-LABEL: icmp_eq_v32i64:
436; CHECK:       // %bb.0:
437; CHECK-NEXT:    ptrue p0.d, vl32
438; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
439; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x1]
440; CHECK-NEXT:    cmpeq p1.d, p0/z, z0.d, z1.d
441; CHECK-NEXT:    mov z0.d, p1/z, #-1 // =0xffffffffffffffff
442; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
443; CHECK-NEXT:    ret
444  %op1 = load <32 x i64>, ptr %a
445  %op2 = load <32 x i64>, ptr %b
446  %cmp = icmp eq <32 x i64> %op1, %op2
447  %sext = sext <32 x i1> %cmp to <32 x i64>
448  store <32 x i64> %sext, ptr %a
449  ret void
450}
451
452;
453; ICMP NE
454;
455
456define void @icmp_ne_v32i8(ptr %a, ptr %b) vscale_range(2,0) #0 {
457; CHECK-LABEL: icmp_ne_v32i8:
458; CHECK:       // %bb.0:
459; CHECK-NEXT:    ptrue p0.b, vl32
460; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
461; CHECK-NEXT:    ld1b { z1.b }, p0/z, [x1]
462; CHECK-NEXT:    cmpne p1.b, p0/z, z0.b, z1.b
463; CHECK-NEXT:    mov z0.b, p1/z, #-1 // =0xffffffffffffffff
464; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
465; CHECK-NEXT:    ret
466  %op1 = load <32 x i8>, ptr %a
467  %op2 = load <32 x i8>, ptr %b
468  %cmp = icmp ne <32 x i8> %op1, %op2
469  %sext = sext <32 x i1> %cmp to <32 x i8>
470  store <32 x i8> %sext, ptr %a
471  ret void
472}
473
474;
475; ICMP SGE
476;
477
478define void @icmp_sge_v32i16(ptr %a, ptr %b) vscale_range(4,0) #0 {
479; CHECK-LABEL: icmp_sge_v32i16:
480; CHECK:       // %bb.0:
481; CHECK-NEXT:    ptrue p0.h, vl32
482; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
483; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
484; CHECK-NEXT:    cmpge p1.h, p0/z, z0.h, z1.h
485; CHECK-NEXT:    mov z0.h, p1/z, #-1 // =0xffffffffffffffff
486; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
487; CHECK-NEXT:    ret
488  %op1 = load <32 x i16>, ptr %a
489  %op2 = load <32 x i16>, ptr %b
490  %cmp = icmp sge <32 x i16> %op1, %op2
491  %sext = sext <32 x i1> %cmp to <32 x i16>
492  store <32 x i16> %sext, ptr %a
493  ret void
494}
495
496;
497; ICMP SGT
498;
499
500define void @icmp_sgt_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
501; CHECK-LABEL: icmp_sgt_v16i16:
502; CHECK:       // %bb.0:
503; CHECK-NEXT:    ptrue p0.h, vl16
504; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
505; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
506; CHECK-NEXT:    cmpgt p1.h, p0/z, z0.h, z1.h
507; CHECK-NEXT:    mov z0.h, p1/z, #-1 // =0xffffffffffffffff
508; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
509; CHECK-NEXT:    ret
510  %op1 = load <16 x i16>, ptr %a
511  %op2 = load <16 x i16>, ptr %b
512  %cmp = icmp sgt <16 x i16> %op1, %op2
513  %sext = sext <16 x i1> %cmp to <16 x i16>
514  store <16 x i16> %sext, ptr %a
515  ret void
516}
517
518;
519; ICMP SLE
520;
521
522define void @icmp_sle_v16i32(ptr %a, ptr %b) vscale_range(4,0) #0 {
523; CHECK-LABEL: icmp_sle_v16i32:
524; CHECK:       // %bb.0:
525; CHECK-NEXT:    ptrue p0.s, vl16
526; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
527; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x1]
528; CHECK-NEXT:    cmpge p1.s, p0/z, z1.s, z0.s
529; CHECK-NEXT:    mov z0.s, p1/z, #-1 // =0xffffffffffffffff
530; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
531; CHECK-NEXT:    ret
532  %op1 = load <16 x i32>, ptr %a
533  %op2 = load <16 x i32>, ptr %b
534  %cmp = icmp sle <16 x i32> %op1, %op2
535  %sext = sext <16 x i1> %cmp to <16 x i32>
536  store <16 x i32> %sext, ptr %a
537  ret void
538}
539
540;
541; ICMP SLT
542;
543
544define void @icmp_slt_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
545; CHECK-LABEL: icmp_slt_v8i32:
546; CHECK:       // %bb.0:
547; CHECK-NEXT:    ptrue p0.s, vl8
548; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
549; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x1]
550; CHECK-NEXT:    cmpgt p1.s, p0/z, z1.s, z0.s
551; CHECK-NEXT:    mov z0.s, p1/z, #-1 // =0xffffffffffffffff
552; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
553; CHECK-NEXT:    ret
554  %op1 = load <8 x i32>, ptr %a
555  %op2 = load <8 x i32>, ptr %b
556  %cmp = icmp slt <8 x i32> %op1, %op2
557  %sext = sext <8 x i1> %cmp to <8 x i32>
558  store <8 x i32> %sext, ptr %a
559  ret void
560}
561
562;
563; ICMP UGE
564;
565
566define void @icmp_uge_v8i64(ptr %a, ptr %b) vscale_range(4,0) #0 {
567; CHECK-LABEL: icmp_uge_v8i64:
568; CHECK:       // %bb.0:
569; CHECK-NEXT:    ptrue p0.d, vl8
570; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
571; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x1]
572; CHECK-NEXT:    cmphs p1.d, p0/z, z0.d, z1.d
573; CHECK-NEXT:    mov z0.d, p1/z, #-1 // =0xffffffffffffffff
574; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
575; CHECK-NEXT:    ret
576  %op1 = load <8 x i64>, ptr %a
577  %op2 = load <8 x i64>, ptr %b
578  %cmp = icmp uge <8 x i64> %op1, %op2
579  %sext = sext <8 x i1> %cmp to <8 x i64>
580  store <8 x i64> %sext, ptr %a
581  ret void
582}
583
584;
585; ICMP UGT
586;
587
588define void @icmp_ugt_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
589; CHECK-LABEL: icmp_ugt_v4i64:
590; CHECK:       // %bb.0:
591; CHECK-NEXT:    ptrue p0.d, vl4
592; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
593; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x1]
594; CHECK-NEXT:    cmphi p1.d, p0/z, z0.d, z1.d
595; CHECK-NEXT:    mov z0.d, p1/z, #-1 // =0xffffffffffffffff
596; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
597; CHECK-NEXT:    ret
598  %op1 = load <4 x i64>, ptr %a
599  %op2 = load <4 x i64>, ptr %b
600  %cmp = icmp ugt <4 x i64> %op1, %op2
601  %sext = sext <4 x i1> %cmp to <4 x i64>
602  store <4 x i64> %sext, ptr %a
603  ret void
604}
605
606;
607; ICMP ULE
608;
609
610define void @icmp_ule_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
611; CHECK-LABEL: icmp_ule_v16i64:
612; CHECK:       // %bb.0:
613; CHECK-NEXT:    ptrue p0.d, vl16
614; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
615; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x1]
616; CHECK-NEXT:    cmphs p1.d, p0/z, z1.d, z0.d
617; CHECK-NEXT:    mov z0.d, p1/z, #-1 // =0xffffffffffffffff
618; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
619; CHECK-NEXT:    ret
620  %op1 = load <16 x i64>, ptr %a
621  %op2 = load <16 x i64>, ptr %b
622  %cmp = icmp ule <16 x i64> %op1, %op2
623  %sext = sext <16 x i1> %cmp to <16 x i64>
624  store <16 x i64> %sext, ptr %a
625  ret void
626}
627
628;
629; ICMP ULT
630;
631
632define void @icmp_ult_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
633; CHECK-LABEL: icmp_ult_v32i64:
634; CHECK:       // %bb.0:
635; CHECK-NEXT:    ptrue p0.d, vl32
636; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
637; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x1]
638; CHECK-NEXT:    cmphi p1.d, p0/z, z1.d, z0.d
639; CHECK-NEXT:    mov z0.d, p1/z, #-1 // =0xffffffffffffffff
640; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
641; CHECK-NEXT:    ret
642  %op1 = load <32 x i64>, ptr %a
643  %op2 = load <32 x i64>, ptr %b
644  %cmp = icmp ult <32 x i64> %op1, %op2
645  %sext = sext <32 x i1> %cmp to <32 x i64>
646  store <32 x i64> %sext, ptr %a
647  ret void
648}
649
650attributes #0 = { "target-features"="+sve" }
651