xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-fixed-length-int-vselect.ll (revision ab7110bcd6b137803935508de8c9f6af377f9454)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -aarch64-sve-vector-bits-min=256  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
3; RUN: llc -aarch64-sve-vector-bits-min=512  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
4; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
5
6target triple = "aarch64-unknown-linux-gnu"
7
8; Don't use SVE for 64-bit vectors.
9define <8 x i8> @select_v8i8(<8 x i8> %op1, <8 x i8> %op2, <8 x i1> %mask) vscale_range(2,0) #0 {
10; CHECK-LABEL: select_v8i8:
11; CHECK:       // %bb.0:
12; CHECK-NEXT:    shl v2.8b, v2.8b, #7
13; CHECK-NEXT:    cmlt v2.8b, v2.8b, #0
14; CHECK-NEXT:    bif v0.8b, v1.8b, v2.8b
15; CHECK-NEXT:    ret
16  %sel = select <8 x i1> %mask, <8 x i8> %op1, <8 x i8> %op2
17  ret <8 x i8> %sel
18}
19
20; Don't use SVE for 128-bit vectors.
21define <16 x i8> @select_v16i8(<16 x i8> %op1, <16 x i8> %op2, <16 x i1> %mask) vscale_range(2,0) #0 {
22; CHECK-LABEL: select_v16i8:
23; CHECK:       // %bb.0:
24; CHECK-NEXT:    shl v2.16b, v2.16b, #7
25; CHECK-NEXT:    cmlt v2.16b, v2.16b, #0
26; CHECK-NEXT:    bif v0.16b, v1.16b, v2.16b
27; CHECK-NEXT:    ret
28  %sel = select <16 x i1> %mask, <16 x i8> %op1, <16 x i8> %op2
29  ret <16 x i8> %sel
30}
31
32define void @select_v32i8(ptr %a, ptr %b) vscale_range(2,0) #0 {
33; CHECK-LABEL: select_v32i8:
34; CHECK:       // %bb.0:
35; CHECK-NEXT:    ptrue p0.b, vl32
36; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
37; CHECK-NEXT:    ld1b { z1.b }, p0/z, [x1]
38; CHECK-NEXT:    cmpeq p1.b, p0/z, z0.b, z1.b
39; CHECK-NEXT:    sel z0.b, p1, z0.b, z1.b
40; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
41; CHECK-NEXT:    ret
42  %op1 = load <32 x i8>, ptr %a
43  %op2 = load <32 x i8>, ptr %b
44  %mask = icmp eq <32 x i8> %op1, %op2
45  %sel = select <32 x i1> %mask, <32 x i8> %op1, <32 x i8> %op2
46  store <32 x i8> %sel, ptr %a
47  ret void
48}
49
50define void @select_v64i8(ptr %a, ptr %b) #0 {
51; VBITS_GE_256-LABEL: select_v64i8:
52; VBITS_GE_256:       // %bb.0:
53; VBITS_GE_256-NEXT:    ptrue p0.b, vl32
54; VBITS_GE_256-NEXT:    mov w8, #32 // =0x20
55; VBITS_GE_256-NEXT:    ld1b { z0.b }, p0/z, [x0, x8]
56; VBITS_GE_256-NEXT:    ld1b { z1.b }, p0/z, [x1, x8]
57; VBITS_GE_256-NEXT:    ld1b { z2.b }, p0/z, [x0]
58; VBITS_GE_256-NEXT:    ld1b { z3.b }, p0/z, [x1]
59; VBITS_GE_256-NEXT:    cmpeq p1.b, p0/z, z0.b, z1.b
60; VBITS_GE_256-NEXT:    cmpeq p2.b, p0/z, z2.b, z3.b
61; VBITS_GE_256-NEXT:    sel z0.b, p1, z0.b, z1.b
62; VBITS_GE_256-NEXT:    sel z1.b, p2, z2.b, z3.b
63; VBITS_GE_256-NEXT:    st1b { z0.b }, p0, [x0, x8]
64; VBITS_GE_256-NEXT:    st1b { z1.b }, p0, [x0]
65; VBITS_GE_256-NEXT:    ret
66;
67; VBITS_GE_512-LABEL: select_v64i8:
68; VBITS_GE_512:       // %bb.0:
69; VBITS_GE_512-NEXT:    ptrue p0.b, vl64
70; VBITS_GE_512-NEXT:    ld1b { z0.b }, p0/z, [x0]
71; VBITS_GE_512-NEXT:    ld1b { z1.b }, p0/z, [x1]
72; VBITS_GE_512-NEXT:    cmpeq p1.b, p0/z, z0.b, z1.b
73; VBITS_GE_512-NEXT:    sel z0.b, p1, z0.b, z1.b
74; VBITS_GE_512-NEXT:    st1b { z0.b }, p0, [x0]
75; VBITS_GE_512-NEXT:    ret
76  %op1 = load <64 x i8>, ptr %a
77  %op2 = load <64 x i8>, ptr %b
78  %mask = icmp eq <64 x i8> %op1, %op2
79  %sel = select <64 x i1> %mask, <64 x i8> %op1, <64 x i8> %op2
80  store <64 x i8> %sel, ptr %a
81  ret void
82}
83
84define void @select_v128i8(ptr %a, ptr %b) vscale_range(8,0) #0 {
85; CHECK-LABEL: select_v128i8:
86; CHECK:       // %bb.0:
87; CHECK-NEXT:    ptrue p0.b, vl128
88; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
89; CHECK-NEXT:    ld1b { z1.b }, p0/z, [x1]
90; CHECK-NEXT:    cmpeq p1.b, p0/z, z0.b, z1.b
91; CHECK-NEXT:    sel z0.b, p1, z0.b, z1.b
92; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
93; CHECK-NEXT:    ret
94  %op1 = load <128 x i8>, ptr %a
95  %op2 = load <128 x i8>, ptr %b
96  %mask = icmp eq <128 x i8> %op1, %op2
97  %sel = select <128 x i1> %mask, <128 x i8> %op1, <128 x i8> %op2
98  store <128 x i8> %sel, ptr %a
99  ret void
100}
101
102define void @select_v256i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
103; CHECK-LABEL: select_v256i8:
104; CHECK:       // %bb.0:
105; CHECK-NEXT:    ptrue p0.b, vl256
106; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
107; CHECK-NEXT:    ld1b { z1.b }, p0/z, [x1]
108; CHECK-NEXT:    cmpeq p1.b, p0/z, z0.b, z1.b
109; CHECK-NEXT:    sel z0.b, p1, z0.b, z1.b
110; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
111; CHECK-NEXT:    ret
112  %op1 = load <256 x i8>, ptr %a
113  %op2 = load <256 x i8>, ptr %b
114  %mask = icmp eq <256 x i8> %op1, %op2
115  %sel = select <256 x i1> %mask, <256 x i8> %op1, <256 x i8> %op2
116  store <256 x i8> %sel, ptr %a
117  ret void
118}
119
120; Don't use SVE for 64-bit vectors.
121define <4 x i16> @select_v4i16(<4 x i16> %op1, <4 x i16> %op2, <4 x i1> %mask) vscale_range(2,0) #0 {
122; CHECK-LABEL: select_v4i16:
123; CHECK:       // %bb.0:
124; CHECK-NEXT:    shl v2.4h, v2.4h, #15
125; CHECK-NEXT:    cmlt v2.4h, v2.4h, #0
126; CHECK-NEXT:    bif v0.8b, v1.8b, v2.8b
127; CHECK-NEXT:    ret
128  %sel = select <4 x i1> %mask, <4 x i16> %op1, <4 x i16> %op2
129  ret <4 x i16> %sel
130}
131
132; Don't use SVE for 128-bit vectors.
133define <8 x i16> @select_v8i16(<8 x i16> %op1, <8 x i16> %op2, <8 x i1> %mask) vscale_range(2,0) #0 {
134; CHECK-LABEL: select_v8i16:
135; CHECK:       // %bb.0:
136; CHECK-NEXT:    ushll v2.8h, v2.8b, #0
137; CHECK-NEXT:    shl v2.8h, v2.8h, #15
138; CHECK-NEXT:    cmlt v2.8h, v2.8h, #0
139; CHECK-NEXT:    bif v0.16b, v1.16b, v2.16b
140; CHECK-NEXT:    ret
141  %sel = select <8 x i1> %mask, <8 x i16> %op1, <8 x i16> %op2
142  ret <8 x i16> %sel
143}
144
145define void @select_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
146; CHECK-LABEL: select_v16i16:
147; CHECK:       // %bb.0:
148; CHECK-NEXT:    ptrue p0.h, vl16
149; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
150; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
151; CHECK-NEXT:    cmpeq p1.h, p0/z, z0.h, z1.h
152; CHECK-NEXT:    sel z0.h, p1, z0.h, z1.h
153; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
154; CHECK-NEXT:    ret
155  %op1 = load <16 x i16>, ptr %a
156  %op2 = load <16 x i16>, ptr %b
157  %mask = icmp eq <16 x i16> %op1, %op2
158  %sel = select <16 x i1> %mask, <16 x i16> %op1, <16 x i16> %op2
159  store <16 x i16> %sel, ptr %a
160  ret void
161}
162
163define void @select_v32i16(ptr %a, ptr %b) #0 {
164; VBITS_GE_256-LABEL: select_v32i16:
165; VBITS_GE_256:       // %bb.0:
166; VBITS_GE_256-NEXT:    ptrue p0.h, vl16
167; VBITS_GE_256-NEXT:    mov x8, #16 // =0x10
168; VBITS_GE_256-NEXT:    ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
169; VBITS_GE_256-NEXT:    ld1h { z1.h }, p0/z, [x1, x8, lsl #1]
170; VBITS_GE_256-NEXT:    ld1h { z2.h }, p0/z, [x0]
171; VBITS_GE_256-NEXT:    ld1h { z3.h }, p0/z, [x1]
172; VBITS_GE_256-NEXT:    cmpeq p1.h, p0/z, z0.h, z1.h
173; VBITS_GE_256-NEXT:    cmpeq p2.h, p0/z, z2.h, z3.h
174; VBITS_GE_256-NEXT:    sel z0.h, p1, z0.h, z1.h
175; VBITS_GE_256-NEXT:    sel z1.h, p2, z2.h, z3.h
176; VBITS_GE_256-NEXT:    st1h { z0.h }, p0, [x0, x8, lsl #1]
177; VBITS_GE_256-NEXT:    st1h { z1.h }, p0, [x0]
178; VBITS_GE_256-NEXT:    ret
179;
180; VBITS_GE_512-LABEL: select_v32i16:
181; VBITS_GE_512:       // %bb.0:
182; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
183; VBITS_GE_512-NEXT:    ld1h { z0.h }, p0/z, [x0]
184; VBITS_GE_512-NEXT:    ld1h { z1.h }, p0/z, [x1]
185; VBITS_GE_512-NEXT:    cmpeq p1.h, p0/z, z0.h, z1.h
186; VBITS_GE_512-NEXT:    sel z0.h, p1, z0.h, z1.h
187; VBITS_GE_512-NEXT:    st1h { z0.h }, p0, [x0]
188; VBITS_GE_512-NEXT:    ret
189  %op1 = load <32 x i16>, ptr %a
190  %op2 = load <32 x i16>, ptr %b
191  %mask = icmp eq <32 x i16> %op1, %op2
192  %sel = select <32 x i1> %mask, <32 x i16> %op1, <32 x i16> %op2
193  store <32 x i16> %sel, ptr %a
194  ret void
195}
196
197define void @select_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
198; CHECK-LABEL: select_v64i16:
199; CHECK:       // %bb.0:
200; CHECK-NEXT:    ptrue p0.h, vl64
201; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
202; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
203; CHECK-NEXT:    cmpeq p1.h, p0/z, z0.h, z1.h
204; CHECK-NEXT:    sel z0.h, p1, z0.h, z1.h
205; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
206; CHECK-NEXT:    ret
207  %op1 = load <64 x i16>, ptr %a
208  %op2 = load <64 x i16>, ptr %b
209  %mask = icmp eq <64 x i16> %op1, %op2
210  %sel = select <64 x i1> %mask, <64 x i16> %op1, <64 x i16> %op2
211  store <64 x i16> %sel, ptr %a
212  ret void
213}
214
215define void @select_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
216; CHECK-LABEL: select_v128i16:
217; CHECK:       // %bb.0:
218; CHECK-NEXT:    ptrue p0.h, vl128
219; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
220; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
221; CHECK-NEXT:    cmpeq p1.h, p0/z, z0.h, z1.h
222; CHECK-NEXT:    sel z0.h, p1, z0.h, z1.h
223; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
224; CHECK-NEXT:    ret
225  %op1 = load <128 x i16>, ptr %a
226  %op2 = load <128 x i16>, ptr %b
227  %mask = icmp eq <128 x i16> %op1, %op2
228  %sel = select <128 x i1> %mask, <128 x i16> %op1, <128 x i16> %op2
229  store <128 x i16> %sel, ptr %a
230  ret void
231}
232
233; Don't use SVE for 64-bit vectors.
234define <2 x i32> @select_v2i32(<2 x i32> %op1, <2 x i32> %op2, <2 x i1> %mask) vscale_range(2,0) #0 {
235; CHECK-LABEL: select_v2i32:
236; CHECK:       // %bb.0:
237; CHECK-NEXT:    shl v2.2s, v2.2s, #31
238; CHECK-NEXT:    cmlt v2.2s, v2.2s, #0
239; CHECK-NEXT:    bif v0.8b, v1.8b, v2.8b
240; CHECK-NEXT:    ret
241  %sel = select <2 x i1> %mask, <2 x i32> %op1, <2 x i32> %op2
242  ret <2 x i32> %sel
243}
244
245; Don't use SVE for 128-bit vectors.
246define <4 x i32> @select_v4i32(<4 x i32> %op1, <4 x i32> %op2, <4 x i1> %mask) vscale_range(2,0) #0 {
247; CHECK-LABEL: select_v4i32:
248; CHECK:       // %bb.0:
249; CHECK-NEXT:    ushll v2.4s, v2.4h, #0
250; CHECK-NEXT:    shl v2.4s, v2.4s, #31
251; CHECK-NEXT:    cmlt v2.4s, v2.4s, #0
252; CHECK-NEXT:    bif v0.16b, v1.16b, v2.16b
253; CHECK-NEXT:    ret
254  %sel = select <4 x i1> %mask, <4 x i32> %op1, <4 x i32> %op2
255  ret <4 x i32> %sel
256}
257
258define void @select_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
259; CHECK-LABEL: select_v8i32:
260; CHECK:       // %bb.0:
261; CHECK-NEXT:    ptrue p0.s, vl8
262; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
263; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x1]
264; CHECK-NEXT:    cmpeq p1.s, p0/z, z0.s, z1.s
265; CHECK-NEXT:    sel z0.s, p1, z0.s, z1.s
266; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
267; CHECK-NEXT:    ret
268  %op1 = load <8 x i32>, ptr %a
269  %op2 = load <8 x i32>, ptr %b
270  %mask = icmp eq <8 x i32> %op1, %op2
271  %sel = select <8 x i1> %mask, <8 x i32> %op1, <8 x i32> %op2
272  store <8 x i32> %sel, ptr %a
273  ret void
274}
275
276define void @select_v16i32(ptr %a, ptr %b) #0 {
277; VBITS_GE_256-LABEL: select_v16i32:
278; VBITS_GE_256:       // %bb.0:
279; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
280; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
281; VBITS_GE_256-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
282; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x1, x8, lsl #2]
283; VBITS_GE_256-NEXT:    ld1w { z2.s }, p0/z, [x0]
284; VBITS_GE_256-NEXT:    ld1w { z3.s }, p0/z, [x1]
285; VBITS_GE_256-NEXT:    cmpeq p1.s, p0/z, z0.s, z1.s
286; VBITS_GE_256-NEXT:    cmpeq p2.s, p0/z, z2.s, z3.s
287; VBITS_GE_256-NEXT:    sel z0.s, p1, z0.s, z1.s
288; VBITS_GE_256-NEXT:    sel z1.s, p2, z2.s, z3.s
289; VBITS_GE_256-NEXT:    st1w { z0.s }, p0, [x0, x8, lsl #2]
290; VBITS_GE_256-NEXT:    st1w { z1.s }, p0, [x0]
291; VBITS_GE_256-NEXT:    ret
292;
293; VBITS_GE_512-LABEL: select_v16i32:
294; VBITS_GE_512:       // %bb.0:
295; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
296; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
297; VBITS_GE_512-NEXT:    ld1w { z1.s }, p0/z, [x1]
298; VBITS_GE_512-NEXT:    cmpeq p1.s, p0/z, z0.s, z1.s
299; VBITS_GE_512-NEXT:    sel z0.s, p1, z0.s, z1.s
300; VBITS_GE_512-NEXT:    st1w { z0.s }, p0, [x0]
301; VBITS_GE_512-NEXT:    ret
302  %op1 = load <16 x i32>, ptr %a
303  %op2 = load <16 x i32>, ptr %b
304  %mask = icmp eq <16 x i32> %op1, %op2
305  %sel = select <16 x i1> %mask, <16 x i32> %op1, <16 x i32> %op2
306  store <16 x i32> %sel, ptr %a
307  ret void
308}
309
310define void @select_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
311; CHECK-LABEL: select_v32i32:
312; CHECK:       // %bb.0:
313; CHECK-NEXT:    ptrue p0.s, vl32
314; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
315; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x1]
316; CHECK-NEXT:    cmpeq p1.s, p0/z, z0.s, z1.s
317; CHECK-NEXT:    sel z0.s, p1, z0.s, z1.s
318; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
319; CHECK-NEXT:    ret
320  %op1 = load <32 x i32>, ptr %a
321  %op2 = load <32 x i32>, ptr %b
322  %mask = icmp eq <32 x i32> %op1, %op2
323  %sel = select <32 x i1> %mask, <32 x i32> %op1, <32 x i32> %op2
324  store <32 x i32> %sel, ptr %a
325  ret void
326}
327
328define void @select_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
329; CHECK-LABEL: select_v64i32:
330; CHECK:       // %bb.0:
331; CHECK-NEXT:    ptrue p0.s, vl64
332; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
333; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x1]
334; CHECK-NEXT:    cmpeq p1.s, p0/z, z0.s, z1.s
335; CHECK-NEXT:    sel z0.s, p1, z0.s, z1.s
336; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
337; CHECK-NEXT:    ret
338  %op1 = load <64 x i32>, ptr %a
339  %op2 = load <64 x i32>, ptr %b
340  %mask = icmp eq <64 x i32> %op1, %op2
341  %sel = select <64 x i1> %mask, <64 x i32> %op1, <64 x i32> %op2
342  store <64 x i32> %sel, ptr %a
343  ret void
344}
345
346; Don't use SVE for 64-bit vectors.
347define <1 x i64> @select_v1i64(<1 x i64> %op1, <1 x i64> %op2, <1 x i1> %mask) vscale_range(2,0) #0 {
348; CHECK-LABEL: select_v1i64:
349; CHECK:       // %bb.0:
350; CHECK-NEXT:    tst w0, #0x1
351; CHECK-NEXT:    csetm x8, ne
352; CHECK-NEXT:    fmov d2, x8
353; CHECK-NEXT:    bif v0.8b, v1.8b, v2.8b
354; CHECK-NEXT:    ret
355  %sel = select <1 x i1> %mask, <1 x i64> %op1, <1 x i64> %op2
356  ret <1 x i64> %sel
357}
358
359; Don't use SVE for 128-bit vectors.
360define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, <2 x i1> %mask) vscale_range(2,0) #0 {
361; CHECK-LABEL: select_v2i64:
362; CHECK:       // %bb.0:
363; CHECK-NEXT:    ushll v2.2d, v2.2s, #0
364; CHECK-NEXT:    shl v2.2d, v2.2d, #63
365; CHECK-NEXT:    cmlt v2.2d, v2.2d, #0
366; CHECK-NEXT:    bif v0.16b, v1.16b, v2.16b
367; CHECK-NEXT:    ret
368  %sel = select <2 x i1> %mask, <2 x i64> %op1, <2 x i64> %op2
369  ret <2 x i64> %sel
370}
371
372define void @select_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
373; CHECK-LABEL: select_v4i64:
374; CHECK:       // %bb.0:
375; CHECK-NEXT:    ptrue p0.d, vl4
376; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
377; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x1]
378; CHECK-NEXT:    cmpeq p1.d, p0/z, z0.d, z1.d
379; CHECK-NEXT:    sel z0.d, p1, z0.d, z1.d
380; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
381; CHECK-NEXT:    ret
382  %op1 = load <4 x i64>, ptr %a
383  %op2 = load <4 x i64>, ptr %b
384  %mask = icmp eq <4 x i64> %op1, %op2
385  %sel = select <4 x i1> %mask, <4 x i64> %op1, <4 x i64> %op2
386  store <4 x i64> %sel, ptr %a
387  ret void
388}
389
390define void @select_v8i64(ptr %a, ptr %b) #0 {
391; VBITS_GE_256-LABEL: select_v8i64:
392; VBITS_GE_256:       // %bb.0:
393; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
394; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
395; VBITS_GE_256-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
396; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x1, x8, lsl #3]
397; VBITS_GE_256-NEXT:    ld1d { z2.d }, p0/z, [x0]
398; VBITS_GE_256-NEXT:    ld1d { z3.d }, p0/z, [x1]
399; VBITS_GE_256-NEXT:    cmpeq p1.d, p0/z, z0.d, z1.d
400; VBITS_GE_256-NEXT:    cmpeq p2.d, p0/z, z2.d, z3.d
401; VBITS_GE_256-NEXT:    sel z0.d, p1, z0.d, z1.d
402; VBITS_GE_256-NEXT:    sel z1.d, p2, z2.d, z3.d
403; VBITS_GE_256-NEXT:    st1d { z0.d }, p0, [x0, x8, lsl #3]
404; VBITS_GE_256-NEXT:    st1d { z1.d }, p0, [x0]
405; VBITS_GE_256-NEXT:    ret
406;
407; VBITS_GE_512-LABEL: select_v8i64:
408; VBITS_GE_512:       // %bb.0:
409; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
410; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
411; VBITS_GE_512-NEXT:    ld1d { z1.d }, p0/z, [x1]
412; VBITS_GE_512-NEXT:    cmpeq p1.d, p0/z, z0.d, z1.d
413; VBITS_GE_512-NEXT:    sel z0.d, p1, z0.d, z1.d
414; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x0]
415; VBITS_GE_512-NEXT:    ret
416  %op1 = load <8 x i64>, ptr %a
417  %op2 = load <8 x i64>, ptr %b
418  %mask = icmp eq <8 x i64> %op1, %op2
419  %sel = select <8 x i1> %mask, <8 x i64> %op1, <8 x i64> %op2
420  store <8 x i64> %sel, ptr %a
421  ret void
422}
423
424define void @select_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
425; CHECK-LABEL: select_v16i64:
426; CHECK:       // %bb.0:
427; CHECK-NEXT:    ptrue p0.d, vl16
428; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
429; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x1]
430; CHECK-NEXT:    cmpeq p1.d, p0/z, z0.d, z1.d
431; CHECK-NEXT:    sel z0.d, p1, z0.d, z1.d
432; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
433; CHECK-NEXT:    ret
434  %op1 = load <16 x i64>, ptr %a
435  %op2 = load <16 x i64>, ptr %b
436  %mask = icmp eq <16 x i64> %op1, %op2
437  %sel = select <16 x i1> %mask, <16 x i64> %op1, <16 x i64> %op2
438  store <16 x i64> %sel, ptr %a
439  ret void
440}
441
442define void @select_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
443; CHECK-LABEL: select_v32i64:
444; CHECK:       // %bb.0:
445; CHECK-NEXT:    ptrue p0.d, vl32
446; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
447; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x1]
448; CHECK-NEXT:    cmpeq p1.d, p0/z, z0.d, z1.d
449; CHECK-NEXT:    sel z0.d, p1, z0.d, z1.d
450; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
451; CHECK-NEXT:    ret
452  %op1 = load <32 x i64>, ptr %a
453  %op2 = load <32 x i64>, ptr %b
454  %mask = icmp eq <32 x i64> %op1, %op2
455  %sel = select <32 x i1> %mask, <32 x i64> %op1, <32 x i64> %op2
456  store <32 x i64> %sel, ptr %a
457  ret void
458}
459
460attributes #0 = { "target-features"="+sve" uwtable }
461