xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-select.ll (revision db158c7c830807caeeb0691739c41f1d522029e9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -verify-machineinstrs < %s | FileCheck %s
3
4define <vscale x  1 x i8> @select_nxv1i8(i1 %cond, <vscale x  1 x i8> %a, <vscale x  1 x i8> %b) {
5; CHECK-LABEL: select_nxv1i8:
6; CHECK:       // %bb.0:
7; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
8; CHECK-NEXT:    sbfx x8, x0, #0, #1
9; CHECK-NEXT:    whilelo p0.b, xzr, x8
10; CHECK-NEXT:    sel z0.b, p0, z0.b, z1.b
11; CHECK-NEXT:    ret
12  %res = select i1 %cond, <vscale x  1 x i8> %a, <vscale x  1 x i8> %b
13  ret <vscale x  1 x i8> %res
14}
15
16define <vscale x  16 x i8> @select_nxv16i8(i1 %cond, <vscale x  16 x i8> %a, <vscale x  16 x i8> %b) {
17; CHECK-LABEL: select_nxv16i8:
18; CHECK:       // %bb.0:
19; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
20; CHECK-NEXT:    sbfx x8, x0, #0, #1
21; CHECK-NEXT:    whilelo p0.b, xzr, x8
22; CHECK-NEXT:    sel z0.b, p0, z0.b, z1.b
23; CHECK-NEXT:    ret
24  %res = select i1 %cond, <vscale x  16 x i8> %a, <vscale x  16 x i8> %b
25  ret <vscale x  16 x i8> %res
26}
27
28define <vscale x  1 x i16> @select_nxv1i16(i1 %cond, <vscale x  1 x i16> %a, <vscale x  1 x i16> %b) {
29; CHECK-LABEL: select_nxv1i16:
30; CHECK:       // %bb.0:
31; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
32; CHECK-NEXT:    sbfx x8, x0, #0, #1
33; CHECK-NEXT:    whilelo p0.h, xzr, x8
34; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
35; CHECK-NEXT:    ret
36  %res = select i1 %cond, <vscale x  1 x i16> %a, <vscale x  1 x i16> %b
37  ret <vscale x  1 x i16> %res
38}
39
40define <vscale x  8 x i16> @select_nxv8i16(i1 %cond, <vscale x  8 x i16> %a, <vscale x  8 x i16> %b) {
41; CHECK-LABEL: select_nxv8i16:
42; CHECK:       // %bb.0:
43; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
44; CHECK-NEXT:    sbfx x8, x0, #0, #1
45; CHECK-NEXT:    whilelo p0.h, xzr, x8
46; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
47; CHECK-NEXT:    ret
48  %res = select i1 %cond, <vscale x  8 x i16> %a, <vscale x  8 x i16> %b
49  ret <vscale x  8 x i16> %res
50}
51
52define <vscale x  1 x i32> @select_nxv1i32(i1 %cond, <vscale x  1 x i32> %a, <vscale x  1 x i32> %b) {
53; CHECK-LABEL: select_nxv1i32:
54; CHECK:       // %bb.0:
55; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
56; CHECK-NEXT:    sbfx x8, x0, #0, #1
57; CHECK-NEXT:    whilelo p0.s, xzr, x8
58; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
59; CHECK-NEXT:    ret
60  %res = select i1 %cond, <vscale x  1 x i32> %a, <vscale x  1 x i32> %b
61  ret <vscale x  1 x i32> %res
62}
63
64define <vscale x  4 x i32> @select_nxv4i32(i1 %cond, <vscale x  4 x i32> %a, <vscale x  4 x i32> %b) {
65; CHECK-LABEL: select_nxv4i32:
66; CHECK:       // %bb.0:
67; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
68; CHECK-NEXT:    sbfx x8, x0, #0, #1
69; CHECK-NEXT:    whilelo p0.s, xzr, x8
70; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
71; CHECK-NEXT:    ret
72  %res = select i1 %cond, <vscale x  4 x i32> %a, <vscale x  4 x i32> %b
73  ret <vscale x  4 x i32> %res
74}
75
76define <vscale x  1 x i64> @select_nxv1i64(i1 %cond, <vscale x  1 x i64> %a, <vscale x  1 x i64> %b) {
77; CHECK-LABEL: select_nxv1i64:
78; CHECK:       // %bb.0:
79; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
80; CHECK-NEXT:    sbfx x8, x0, #0, #1
81; CHECK-NEXT:    whilelo p0.d, xzr, x8
82; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
83; CHECK-NEXT:    ret
84  %res = select i1 %cond, <vscale x  1 x i64> %a, <vscale x  1 x i64> %b
85  ret <vscale x  1 x i64> %res
86}
87
88define <vscale x  2 x i64> @select_nxv2i64(i1 %cond, <vscale x  2 x i64> %a, <vscale x  2 x i64> %b) {
89; CHECK-LABEL: select_nxv2i64:
90; CHECK:       // %bb.0:
91; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
92; CHECK-NEXT:    sbfx x8, x0, #0, #1
93; CHECK-NEXT:    whilelo p0.d, xzr, x8
94; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
95; CHECK-NEXT:    ret
96  %res = select i1 %cond, <vscale x  2 x i64> %a, <vscale x  2 x i64> %b
97  ret <vscale x  2 x i64> %res
98}
99
100define <vscale x  8 x half> @select_nxv8f16(i1 %cond, <vscale x  8 x half> %a, <vscale x  8 x half> %b) {
101; CHECK-LABEL: select_nxv8f16:
102; CHECK:       // %bb.0:
103; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
104; CHECK-NEXT:    sbfx x8, x0, #0, #1
105; CHECK-NEXT:    whilelo p0.h, xzr, x8
106; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
107; CHECK-NEXT:    ret
108  %res = select i1 %cond, <vscale x  8 x half> %a, <vscale x  8 x half> %b
109  ret <vscale x  8 x half> %res
110}
111
112define <vscale x  4 x float> @select_nxv4f32(i1 %cond, <vscale x  4 x float> %a, <vscale x  4 x float> %b) {
113; CHECK-LABEL: select_nxv4f32:
114; CHECK:       // %bb.0:
115; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
116; CHECK-NEXT:    sbfx x8, x0, #0, #1
117; CHECK-NEXT:    whilelo p0.s, xzr, x8
118; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
119; CHECK-NEXT:    ret
120  %res = select i1 %cond, <vscale x  4 x float> %a, <vscale x  4 x float> %b
121  ret <vscale x  4 x float> %res
122}
123
124define <vscale x  2 x double> @select_nxv2f64(i1 %cond, <vscale x  2 x double> %a, <vscale x  2 x double> %b) {
125; CHECK-LABEL: select_nxv2f64:
126; CHECK:       // %bb.0:
127; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
128; CHECK-NEXT:    sbfx x8, x0, #0, #1
129; CHECK-NEXT:    whilelo p0.d, xzr, x8
130; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
131; CHECK-NEXT:    ret
132  %res = select i1 %cond, <vscale x  2 x double> %a, <vscale x  2 x double> %b
133  ret <vscale x  2 x double> %res
134}
135
136define <vscale x  16 x i1> @select_nxv16i1(i1 %cond, <vscale x  16 x i1> %a, <vscale x  16 x i1> %b) {
137; CHECK-LABEL: select_nxv16i1:
138; CHECK:       // %bb.0:
139; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
140; CHECK-NEXT:    sbfx x8, x0, #0, #1
141; CHECK-NEXT:    whilelo p2.b, xzr, x8
142; CHECK-NEXT:    sel p0.b, p2, p0.b, p1.b
143; CHECK-NEXT:    ret
144  %res = select i1 %cond, <vscale x  16 x i1> %a, <vscale x  16 x i1> %b
145  ret <vscale x  16 x i1> %res
146}
147
148define <vscale x  8 x i1> @select_nxv8i1(i1 %cond, <vscale x  8 x i1> %a, <vscale x  8 x i1> %b) {
149; CHECK-LABEL: select_nxv8i1:
150; CHECK:       // %bb.0:
151; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
152; CHECK-NEXT:    sbfx x8, x0, #0, #1
153; CHECK-NEXT:    whilelo p2.h, xzr, x8
154; CHECK-NEXT:    sel p0.b, p2, p0.b, p1.b
155; CHECK-NEXT:    ret
156  %res = select i1 %cond, <vscale x  8 x i1> %a, <vscale x  8 x i1> %b
157  ret <vscale x  8 x i1> %res
158}
159
160define <vscale x  4 x i1> @select_nxv4i1(i1 %cond, <vscale x  4 x i1> %a, <vscale x  4 x i1> %b) {
161; CHECK-LABEL: select_nxv4i1:
162; CHECK:       // %bb.0:
163; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
164; CHECK-NEXT:    sbfx x8, x0, #0, #1
165; CHECK-NEXT:    whilelo p2.s, xzr, x8
166; CHECK-NEXT:    sel p0.b, p2, p0.b, p1.b
167; CHECK-NEXT:    ret
168  %res = select i1 %cond, <vscale x  4 x i1> %a, <vscale x  4 x i1> %b
169  ret <vscale x  4 x i1> %res
170}
171
172define <vscale x  2 x i1> @select_nxv2i1(i1 %cond, <vscale x  2 x i1> %a, <vscale x  2 x i1> %b) {
173; CHECK-LABEL: select_nxv2i1:
174; CHECK:       // %bb.0:
175; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
176; CHECK-NEXT:    sbfx x8, x0, #0, #1
177; CHECK-NEXT:    whilelo p2.d, xzr, x8
178; CHECK-NEXT:    sel p0.b, p2, p0.b, p1.b
179; CHECK-NEXT:    ret
180  %res = select i1 %cond, <vscale x  2 x i1> %a, <vscale x  2 x i1> %b
181  ret <vscale x  2 x i1> %res
182}
183
184define <vscale x  1 x i1> @select_nxv1i1(i1 %cond, <vscale x  1 x i1> %a, <vscale x  1 x i1> %b) {
185; CHECK-LABEL: select_nxv1i1:
186; CHECK:       // %bb.0:
187; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
188; CHECK-NEXT:    sbfx x8, x0, #0, #1
189; CHECK-NEXT:    whilelo p2.d, xzr, x8
190; CHECK-NEXT:    punpklo p2.h, p2.b
191; CHECK-NEXT:    sel p0.b, p2, p0.b, p1.b
192; CHECK-NEXT:    ret
193  %res = select i1 %cond, <vscale x  1 x i1> %a, <vscale x  1 x i1> %b
194  ret <vscale x  1 x i1> %res
195}
196
197; Integer vector select
198
199define <vscale x 16 x i8> @sel_nxv16i8(<vscale x 16 x i1> %p, <vscale x 16 x i8> %dst, <vscale x 16 x i8> %a) {
200; CHECK-LABEL: sel_nxv16i8:
201; CHECK:       // %bb.0:
202; CHECK-NEXT:    mov z0.b, p0/m, z1.b
203; CHECK-NEXT:    ret
204  %sel = select <vscale x 16 x i1> %p, <vscale x 16 x i8> %a, <vscale x 16 x i8> %dst
205  ret <vscale x 16 x i8> %sel
206}
207
208define <vscale x 8 x i16> @sel_nxv8i16(<vscale x 8 x i1> %p, <vscale x 8 x i16> %dst, <vscale x 8 x i16> %a) {
209; CHECK-LABEL: sel_nxv8i16:
210; CHECK:       // %bb.0:
211; CHECK-NEXT:    mov z0.h, p0/m, z1.h
212; CHECK-NEXT:    ret
213  %sel = select <vscale x 8 x i1> %p, <vscale x 8 x i16> %a, <vscale x 8 x i16> %dst
214  ret <vscale x 8 x i16> %sel
215}
216
217define <vscale x 4 x i32> @sel_nxv4i32(<vscale x 4 x i1> %p, <vscale x 4 x i32> %dst, <vscale x 4 x i32> %a) {
218; CHECK-LABEL: sel_nxv4i32:
219; CHECK:       // %bb.0:
220; CHECK-NEXT:    mov z0.s, p0/m, z1.s
221; CHECK-NEXT:    ret
222  %sel = select <vscale x 4 x i1> %p, <vscale x 4 x i32> %a, <vscale x 4 x i32> %dst
223  ret <vscale x 4 x i32> %sel
224}
225
226define <vscale x 1 x i64> @sel_nxv1i64(<vscale x 1 x i1> %p, <vscale x 1 x i64> %dst, <vscale x 1 x i64> %a) {
227; CHECK-LABEL: sel_nxv1i64:
228; CHECK:       // %bb.0:
229; CHECK-NEXT:    uzp1 p0.d, p0.d, p0.d
230; CHECK-NEXT:    mov z0.d, p0/m, z1.d
231; CHECK-NEXT:    ret
232  %sel = select <vscale x 1 x i1> %p, <vscale x 1 x i64> %a, <vscale x 1 x i64> %dst
233  ret <vscale x 1 x i64> %sel
234}
235
236define <vscale x 2 x i64> @sel_nxv2i64(<vscale x 2 x i1> %p, <vscale x 2 x i64> %dst, <vscale x 2 x i64> %a) {
237; CHECK-LABEL: sel_nxv2i64:
238; CHECK:       // %bb.0:
239; CHECK-NEXT:    mov z0.d, p0/m, z1.d
240; CHECK-NEXT:    ret
241  %sel = select <vscale x 2 x i1> %p, <vscale x 2 x i64> %a, <vscale x 2 x i64> %dst
242  ret <vscale x 2 x i64> %sel
243}
244
245; Floating point vector select
246
247define <vscale x 8 x half> @sel_nxv8f16(<vscale x 8 x i1> %p, <vscale x 8 x half> %dst, <vscale x 8 x half> %a) {
248; CHECK-LABEL: sel_nxv8f16:
249; CHECK:       // %bb.0:
250; CHECK-NEXT:    mov z0.h, p0/m, z1.h
251; CHECK-NEXT:    ret
252  %sel = select <vscale x 8 x i1> %p, <vscale x 8 x half> %a, <vscale x 8 x half> %dst
253  ret <vscale x 8 x half> %sel
254}
255
256define <vscale x 4 x float> @sel_nxv4f32(<vscale x 4 x i1> %p, <vscale x 4 x float> %dst, <vscale x 4 x float> %a) {
257; CHECK-LABEL: sel_nxv4f32:
258; CHECK:       // %bb.0:
259; CHECK-NEXT:    mov z0.s, p0/m, z1.s
260; CHECK-NEXT:    ret
261  %sel = select <vscale x 4 x i1> %p, <vscale x 4 x float> %a, <vscale x 4 x float> %dst
262  ret <vscale x 4 x float> %sel
263}
264
265define <vscale x 2 x float> @sel_nxv2f32(<vscale x 2 x i1> %p, <vscale x 2 x float> %dst, <vscale x 2 x float> %a) {
266; CHECK-LABEL: sel_nxv2f32:
267; CHECK:       // %bb.0:
268; CHECK-NEXT:    mov z0.d, p0/m, z1.d
269; CHECK-NEXT:    ret
270  %sel = select <vscale x 2 x i1> %p, <vscale x 2 x float> %a, <vscale x 2 x float> %dst
271  ret <vscale x 2 x float> %sel
272}
273
274define <vscale x 2 x double> @sel_nxv8f64(<vscale x 2 x i1> %p, <vscale x 2 x double> %dst, <vscale x 2 x double> %a) {
275; CHECK-LABEL: sel_nxv8f64:
276; CHECK:       // %bb.0:
277; CHECK-NEXT:    mov z0.d, p0/m, z1.d
278; CHECK-NEXT:    ret
279  %sel = select <vscale x 2 x i1> %p, <vscale x 2 x double> %a, <vscale x 2 x double> %dst
280  ret <vscale x 2 x double> %sel
281}
282
283; Check icmp+select
284
285define <vscale x 2 x half> @icmp_select_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, i64 %x0) {
286; CHECK-LABEL: icmp_select_nxv2f16:
287; CHECK:       // %bb.0:
288; CHECK-NEXT:    cmp x0, #0
289; CHECK-NEXT:    cset w8, eq
290; CHECK-NEXT:    sbfx x8, x8, #0, #1
291; CHECK-NEXT:    whilelo p0.d, xzr, x8
292; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
293; CHECK-NEXT:    ret
294  %mask = icmp eq i64 %x0, 0
295  %sel = select i1 %mask, <vscale x 2 x half> %a, <vscale x 2 x half> %b
296  ret <vscale x 2 x half> %sel
297}
298
299define <vscale x 2 x float> @icmp_select_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, i64 %x0) {
300; CHECK-LABEL: icmp_select_nxv2f32:
301; CHECK:       // %bb.0:
302; CHECK-NEXT:    cmp x0, #0
303; CHECK-NEXT:    cset w8, eq
304; CHECK-NEXT:    sbfx x8, x8, #0, #1
305; CHECK-NEXT:    whilelo p0.d, xzr, x8
306; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
307; CHECK-NEXT:    ret
308  %mask = icmp eq i64 %x0, 0
309  %sel = select i1 %mask, <vscale x 2 x float> %a, <vscale x 2 x float> %b
310  ret <vscale x 2 x float> %sel
311}
312
313define <vscale x 2 x double> @icmp_select_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, i64 %x0) {
314; CHECK-LABEL: icmp_select_nxv2f64:
315; CHECK:       // %bb.0:
316; CHECK-NEXT:    cmp x0, #0
317; CHECK-NEXT:    cset w8, eq
318; CHECK-NEXT:    sbfx x8, x8, #0, #1
319; CHECK-NEXT:    whilelo p0.d, xzr, x8
320; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
321; CHECK-NEXT:    ret
322  %mask = icmp eq i64 %x0, 0
323  %sel = select i1 %mask, <vscale x 2 x double> %a, <vscale x 2 x double> %b
324  ret <vscale x 2 x double> %sel
325}
326
327define <vscale x 4 x half> @icmp_select_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, i64 %x0) {
328; CHECK-LABEL: icmp_select_nxv4f16:
329; CHECK:       // %bb.0:
330; CHECK-NEXT:    cmp x0, #0
331; CHECK-NEXT:    cset w8, eq
332; CHECK-NEXT:    sbfx x8, x8, #0, #1
333; CHECK-NEXT:    whilelo p0.s, xzr, x8
334; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
335; CHECK-NEXT:    ret
336  %mask = icmp eq i64 %x0, 0
337  %sel = select i1 %mask, <vscale x 4 x half> %a, <vscale x 4 x half> %b
338  ret <vscale x 4 x half> %sel
339}
340
341define <vscale x 4 x float> @icmp_select_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, i64 %x0) {
342; CHECK-LABEL: icmp_select_nxv4f32:
343; CHECK:       // %bb.0:
344; CHECK-NEXT:    cmp x0, #0
345; CHECK-NEXT:    cset w8, eq
346; CHECK-NEXT:    sbfx x8, x8, #0, #1
347; CHECK-NEXT:    whilelo p0.s, xzr, x8
348; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
349; CHECK-NEXT:    ret
350  %mask = icmp eq i64 %x0, 0
351  %sel = select i1 %mask, <vscale x 4 x float> %a, <vscale x 4 x float> %b
352  ret <vscale x 4 x float> %sel
353}
354
355define <vscale x 8 x half> @icmp_select_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, i64 %x0) {
356; CHECK-LABEL: icmp_select_nxv8f16:
357; CHECK:       // %bb.0:
358; CHECK-NEXT:    cmp x0, #0
359; CHECK-NEXT:    cset w8, eq
360; CHECK-NEXT:    sbfx x8, x8, #0, #1
361; CHECK-NEXT:    whilelo p0.h, xzr, x8
362; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
363; CHECK-NEXT:    ret
364  %mask = icmp eq i64 %x0, 0
365  %sel = select i1 %mask, <vscale x 8 x half> %a, <vscale x 8 x half> %b
366  ret <vscale x 8 x half> %sel
367}
368
369define <vscale x 1 x i64> @icmp_select_nxv1i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %b, i64 %x0) {
370; CHECK-LABEL: icmp_select_nxv1i64:
371; CHECK:       // %bb.0:
372; CHECK-NEXT:    cmp x0, #0
373; CHECK-NEXT:    cset w8, eq
374; CHECK-NEXT:    sbfx x8, x8, #0, #1
375; CHECK-NEXT:    whilelo p0.d, xzr, x8
376; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
377; CHECK-NEXT:    ret
378  %mask = icmp eq i64 %x0, 0
379  %sel = select i1 %mask, <vscale x 1 x i64> %a, <vscale x 1 x i64> %b
380  ret <vscale x 1 x i64> %sel
381}
382
383define <vscale x 2 x i64> @icmp_select_nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, i64 %x0) {
384; CHECK-LABEL: icmp_select_nxv2i64:
385; CHECK:       // %bb.0:
386; CHECK-NEXT:    cmp x0, #0
387; CHECK-NEXT:    cset w8, eq
388; CHECK-NEXT:    sbfx x8, x8, #0, #1
389; CHECK-NEXT:    whilelo p0.d, xzr, x8
390; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
391; CHECK-NEXT:    ret
392  %mask = icmp eq i64 %x0, 0
393  %sel = select i1 %mask, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b
394  ret <vscale x 2 x i64> %sel
395}
396
397define <vscale x 1 x i32> @icmp_select_nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, i64 %x0) {
398; CHECK-LABEL: icmp_select_nxv1i32:
399; CHECK:       // %bb.0:
400; CHECK-NEXT:    cmp x0, #0
401; CHECK-NEXT:    cset w8, eq
402; CHECK-NEXT:    sbfx x8, x8, #0, #1
403; CHECK-NEXT:    whilelo p0.s, xzr, x8
404; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
405; CHECK-NEXT:    ret
406  %mask = icmp eq i64 %x0, 0
407  %sel = select i1 %mask, <vscale x 1 x i32> %a, <vscale x 1 x i32> %b
408  ret <vscale x 1 x i32> %sel
409}
410
411define <vscale x 4 x i32> @icmp_select_nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i64 %x0) {
412; CHECK-LABEL: icmp_select_nxv4i32:
413; CHECK:       // %bb.0:
414; CHECK-NEXT:    cmp x0, #0
415; CHECK-NEXT:    cset w8, eq
416; CHECK-NEXT:    sbfx x8, x8, #0, #1
417; CHECK-NEXT:    whilelo p0.s, xzr, x8
418; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
419; CHECK-NEXT:    ret
420  %mask = icmp eq i64 %x0, 0
421  %sel = select i1 %mask, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b
422  ret <vscale x 4 x i32> %sel
423}
424
425define <vscale x 1 x i16> @icmp_select_nxv1i16(<vscale x 1 x i16> %a, <vscale x 1 x i16> %b, i64 %x0) {
426; CHECK-LABEL: icmp_select_nxv1i16:
427; CHECK:       // %bb.0:
428; CHECK-NEXT:    cmp x0, #0
429; CHECK-NEXT:    cset w8, eq
430; CHECK-NEXT:    sbfx x8, x8, #0, #1
431; CHECK-NEXT:    whilelo p0.h, xzr, x8
432; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
433; CHECK-NEXT:    ret
434  %mask = icmp eq i64 %x0, 0
435  %sel = select i1 %mask, <vscale x 1 x i16> %a, <vscale x 1 x i16> %b
436  ret <vscale x 1 x i16> %sel
437}
438
439define <vscale x 8 x i16> @icmp_select_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, i64 %x0) {
440; CHECK-LABEL: icmp_select_nxv8i16:
441; CHECK:       // %bb.0:
442; CHECK-NEXT:    cmp x0, #0
443; CHECK-NEXT:    cset w8, eq
444; CHECK-NEXT:    sbfx x8, x8, #0, #1
445; CHECK-NEXT:    whilelo p0.h, xzr, x8
446; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
447; CHECK-NEXT:    ret
448  %mask = icmp eq i64 %x0, 0
449  %sel = select i1 %mask, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b
450  ret <vscale x 8 x i16> %sel
451}
452
453define  <vscale x 1 x i8> @icmp_select_nxv1i8(<vscale x 1 x i8> %a, <vscale x 1 x i8> %b, i64 %x0) {
454; CHECK-LABEL: icmp_select_nxv1i8:
455; CHECK:       // %bb.0:
456; CHECK-NEXT:    cmp x0, #0
457; CHECK-NEXT:    cset w8, eq
458; CHECK-NEXT:    sbfx x8, x8, #0, #1
459; CHECK-NEXT:    whilelo p0.b, xzr, x8
460; CHECK-NEXT:    sel z0.b, p0, z0.b, z1.b
461; CHECK-NEXT:    ret
462  %mask = icmp eq i64 %x0, 0
463  %sel = select i1 %mask, <vscale x 1 x i8> %a, <vscale x 1 x i8> %b
464  ret <vscale x 1 x i8> %sel
465}
466
467define  <vscale x 16 x i8> @icmp_select_nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i64 %x0) {
468; CHECK-LABEL: icmp_select_nxv16i8:
469; CHECK:       // %bb.0:
470; CHECK-NEXT:    cmp x0, #0
471; CHECK-NEXT:    cset w8, eq
472; CHECK-NEXT:    sbfx x8, x8, #0, #1
473; CHECK-NEXT:    whilelo p0.b, xzr, x8
474; CHECK-NEXT:    sel z0.b, p0, z0.b, z1.b
475; CHECK-NEXT:    ret
476  %mask = icmp eq i64 %x0, 0
477  %sel = select i1 %mask, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b
478  ret <vscale x 16 x i8> %sel
479}
480
481define <vscale x 1 x i1> @icmp_select_nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, i64 %x0) {
482; CHECK-LABEL: icmp_select_nxv1i1:
483; CHECK:       // %bb.0:
484; CHECK-NEXT:    cmp x0, #0
485; CHECK-NEXT:    cset w8, eq
486; CHECK-NEXT:    sbfx x8, x8, #0, #1
487; CHECK-NEXT:    whilelo p2.d, xzr, x8
488; CHECK-NEXT:    punpklo p2.h, p2.b
489; CHECK-NEXT:    sel p0.b, p2, p0.b, p1.b
490; CHECK-NEXT:    ret
491    %mask = icmp eq i64 %x0, 0
492    %sel = select i1 %mask, <vscale x 1 x i1> %a, <vscale x 1 x i1> %b
493    ret <vscale x 1 x i1> %sel
494}
495
496define <vscale x 2 x i1> @icmp_select_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b, i64 %x0) {
497; CHECK-LABEL: icmp_select_nxv2i1:
498; CHECK:       // %bb.0:
499; CHECK-NEXT:    cmp x0, #0
500; CHECK-NEXT:    cset w8, eq
501; CHECK-NEXT:    sbfx x8, x8, #0, #1
502; CHECK-NEXT:    whilelo p2.d, xzr, x8
503; CHECK-NEXT:    sel p0.b, p2, p0.b, p1.b
504; CHECK-NEXT:    ret
505    %mask = icmp eq i64 %x0, 0
506    %sel = select i1 %mask, <vscale x 2 x i1> %a, <vscale x 2 x i1> %b
507    ret <vscale x 2 x i1> %sel
508}
509define <vscale x 4 x i1> @icmp_select_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b, i64 %x0) {
510; CHECK-LABEL: icmp_select_nxv4i1:
511; CHECK:       // %bb.0:
512; CHECK-NEXT:    cmp x0, #0
513; CHECK-NEXT:    cset w8, eq
514; CHECK-NEXT:    sbfx x8, x8, #0, #1
515; CHECK-NEXT:    whilelo p2.s, xzr, x8
516; CHECK-NEXT:    sel p0.b, p2, p0.b, p1.b
517; CHECK-NEXT:    ret
518    %mask = icmp eq i64 %x0, 0
519    %sel = select i1 %mask, <vscale x 4 x i1> %a, <vscale x 4 x i1> %b
520    ret <vscale x 4 x i1> %sel
521}
522define <vscale x 8 x i1> @icmp_select_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b, i64 %x0) {
523; CHECK-LABEL: icmp_select_nxv8i1:
524; CHECK:       // %bb.0:
525; CHECK-NEXT:    cmp x0, #0
526; CHECK-NEXT:    cset w8, eq
527; CHECK-NEXT:    sbfx x8, x8, #0, #1
528; CHECK-NEXT:    whilelo p2.h, xzr, x8
529; CHECK-NEXT:    sel p0.b, p2, p0.b, p1.b
530; CHECK-NEXT:    ret
531    %mask = icmp eq i64 %x0, 0
532    %sel = select i1 %mask, <vscale x 8 x i1> %a, <vscale x 8 x i1> %b
533    ret <vscale x 8 x i1> %sel
534}
535define <vscale x 16 x i1> @icmp_select_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b, i64 %x0) {
536; CHECK-LABEL: icmp_select_nxv16i1:
537; CHECK:       // %bb.0:
538; CHECK-NEXT:    cmp x0, #0
539; CHECK-NEXT:    cset w8, eq
540; CHECK-NEXT:    sbfx x8, x8, #0, #1
541; CHECK-NEXT:    whilelo p2.b, xzr, x8
542; CHECK-NEXT:    sel p0.b, p2, p0.b, p1.b
543; CHECK-NEXT:    ret
544    %mask = icmp eq i64 %x0, 0
545    %sel = select i1 %mask, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b
546    ret <vscale x 16 x i1> %sel
547}
548
549define <vscale x 4 x float> @select_f32_invert_fmul(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
550; CHECK-LABEL: select_f32_invert_fmul:
551; CHECK:       // %bb.0:
552; CHECK-NEXT:    ptrue p0.s
553; CHECK-NEXT:    fcmne p0.s, p0/z, z0.s, #0.0
554; CHECK-NEXT:    fmul z0.s, p0/m, z0.s, z1.s
555; CHECK-NEXT:    ret
556  %p = fcmp oeq <vscale x 4 x float> %a, zeroinitializer
557  %fmul = fmul <vscale x 4 x float> %a, %b
558  %sel = select <vscale x 4 x i1> %p, <vscale x 4 x float> %a, <vscale x 4 x float> %fmul
559  ret <vscale x 4 x float> %sel
560}
561
562define <vscale x 4 x float> @select_f32_invert_fadd(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
563; CHECK-LABEL: select_f32_invert_fadd:
564; CHECK:       // %bb.0:
565; CHECK-NEXT:    ptrue p0.s
566; CHECK-NEXT:    fcmne p0.s, p0/z, z0.s, #0.0
567; CHECK-NEXT:    fadd z0.s, p0/m, z0.s, z1.s
568; CHECK-NEXT:    ret
569  %p = fcmp oeq <vscale x 4 x float> %a, zeroinitializer
570  %fadd = fadd <vscale x 4 x float> %a, %b
571  %sel = select <vscale x 4 x i1> %p, <vscale x 4 x float> %a, <vscale x 4 x float> %fadd
572  ret <vscale x 4 x float> %sel
573}
574
575define <vscale x 4 x float> @select_f32_invert_fsub(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x i32> %c) {
576; CHECK-LABEL: select_f32_invert_fsub:
577; CHECK:       // %bb.0:
578; CHECK-NEXT:    ptrue p0.s
579; CHECK-NEXT:    cmpne p0.s, p0/z, z2.s, #0
580; CHECK-NEXT:    fsub z0.s, p0/m, z0.s, z1.s
581; CHECK-NEXT:    ret
582  %p = icmp eq <vscale x 4 x i32> %c, zeroinitializer
583  %fsub = fsub <vscale x 4 x float> %a, %b
584  %sel = select <vscale x 4 x i1> %p, <vscale x 4 x float> %a, <vscale x 4 x float> %fsub
585  ret <vscale x 4 x float> %sel
586}
587
588define <vscale x 4 x float> @select_f32_no_invert_op_lhs(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
589; CHECK-LABEL: select_f32_no_invert_op_lhs:
590; CHECK:       // %bb.0:
591; CHECK-NEXT:    ptrue p0.s
592; CHECK-NEXT:    fcmeq p0.s, p0/z, z0.s, #0.0
593; CHECK-NEXT:    fmul z0.s, p0/m, z0.s, z1.s
594; CHECK-NEXT:    ret
595  %p = fcmp oeq <vscale x 4 x float> %a, zeroinitializer
596  %fmul = fmul <vscale x 4 x float> %a, %b
597  %sel = select <vscale x 4 x i1> %p, <vscale x 4 x float> %fmul, <vscale x 4 x float> %a
598  ret <vscale x 4 x float> %sel
599}
600
601define <vscale x 4 x float> @select_f32_no_invert_2_op(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x float> %d) {
602; CHECK-LABEL: select_f32_no_invert_2_op:
603; CHECK:       // %bb.0:
604; CHECK-NEXT:    ptrue p0.s
605; CHECK-NEXT:    fmul z1.s, z0.s, z1.s
606; CHECK-NEXT:    fcmeq p0.s, p0/z, z0.s, #0.0
607; CHECK-NEXT:    fmul z0.s, z2.s, z3.s
608; CHECK-NEXT:    mov z0.s, p0/m, z1.s
609; CHECK-NEXT:    ret
610  %p = fcmp oeq <vscale x 4 x float> %a, zeroinitializer
611  %fmul1 = fmul <vscale x 4 x float> %a, %b
612  %fmul2 = fmul <vscale x 4 x float> %c, %d
613  %sel = select <vscale x 4 x i1> %p, <vscale x 4 x float> %fmul1, <vscale x 4 x float> %fmul2
614  ret <vscale x 4 x float> %sel
615}
616
617define <vscale x 4 x float> @select_f32_no_invert_equal_ops(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
618; CHECK-LABEL: select_f32_no_invert_equal_ops:
619; CHECK:       // %bb.0:
620; CHECK-NEXT:    fmul z0.s, z0.s, z1.s
621; CHECK-NEXT:    ret
622  %m = fmul <vscale x 4 x float> %a, %b
623  %p = fcmp oeq <vscale x 4 x float> %m, zeroinitializer
624  %sel = select <vscale x 4 x i1> %p, <vscale x 4 x float> %m, <vscale x 4 x float> %m
625  ret <vscale x 4 x float> %sel
626}
627
628define <vscale x 4 x float> @select_f32_no_invert_fmul_two_setcc_uses(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, i32 %len) #0 {
629; CHECK-LABEL: select_f32_no_invert_fmul_two_setcc_uses:
630; CHECK:       // %bb.0:
631; CHECK-NEXT:    ptrue p0.s
632; CHECK-NEXT:    fadd z1.s, z0.s, z1.s
633; CHECK-NEXT:    fcmeq p0.s, p0/z, z0.s, #0.0
634; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
635; CHECK-NEXT:    mov z0.s, p0/m, z2.s
636; CHECK-NEXT:    ret
637  %p = fcmp oeq <vscale x 4 x float> %a, zeroinitializer
638  %fadd = fadd <vscale x 4 x float> %a, %b
639  %sel = select <vscale x 4 x i1> %p, <vscale x 4 x float> %a, <vscale x 4 x float> %fadd
640  %sel2 = select <vscale x 4 x i1> %p, <vscale x 4 x float> %c, <vscale x 4 x float> %sel
641  ret <vscale x 4 x float> %sel2
642}
643
644define <4 x float> @select_f32_no_invert_not_scalable(<4 x float> %a, <4 x float> %b) #0 {
645; CHECK-LABEL: select_f32_no_invert_not_scalable:
646; CHECK:       // %bb.0:
647; CHECK-NEXT:    fcmeq v2.4s, v0.4s, #0.0
648; CHECK-NEXT:    fmul v1.4s, v0.4s, v1.4s
649; CHECK-NEXT:    bif v0.16b, v1.16b, v2.16b
650; CHECK-NEXT:    ret
651  %p = fcmp oeq <4 x float> %a, zeroinitializer
652  %fmul = fmul <4 x float> %a, %b
653  %sel = select <4 x i1> %p, <4 x float> %a, <4 x float> %fmul
654  ret <4 x float> %sel
655}
656