xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll (revision cc82f1290a1e2157a6c0530d78d8cc84d2b8553d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -aarch64-sve-vector-bits-min=256  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
3; RUN: llc -aarch64-sve-vector-bits-min=512  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
4; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
5
6target triple = "aarch64-unknown-linux-gnu"
7
8; Don't use SVE for 64-bit vectors.
9define <4 x half> @select_v4f16(<4 x half> %op1, <4 x half> %op2, i1 %mask) vscale_range(2,0) #0 {
10; CHECK-LABEL: select_v4f16:
11; CHECK:       // %bb.0:
12; CHECK-NEXT:    tst w0, #0x1
13; CHECK-NEXT:    csetm w8, ne
14; CHECK-NEXT:    dup v2.4h, w8
15; CHECK-NEXT:    bif v0.8b, v1.8b, v2.8b
16; CHECK-NEXT:    ret
17  %sel = select i1 %mask, <4 x half> %op1, <4 x half> %op2
18  ret <4 x half> %sel
19}
20
21; Don't use SVE for 128-bit vectors.
22define <8 x half> @select_v8f16(<8 x half> %op1, <8 x half> %op2, i1 %mask) vscale_range(2,0) #0 {
23; CHECK-LABEL: select_v8f16:
24; CHECK:       // %bb.0:
25; CHECK-NEXT:    tst w0, #0x1
26; CHECK-NEXT:    csetm w8, ne
27; CHECK-NEXT:    dup v2.8h, w8
28; CHECK-NEXT:    bif v0.16b, v1.16b, v2.16b
29; CHECK-NEXT:    ret
30  %sel = select i1 %mask, <8 x half> %op1, <8 x half> %op2
31  ret <8 x half> %sel
32}
33
34define void @select_v16f16(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
35; CHECK-LABEL: select_v16f16:
36; CHECK:       // %bb.0:
37; CHECK-NEXT:    mov z0.h, w2
38; CHECK-NEXT:    ptrue p0.h
39; CHECK-NEXT:    ptrue p1.h, vl16
40; CHECK-NEXT:    and z0.h, z0.h, #0x1
41; CHECK-NEXT:    cmpne p0.h, p0/z, z0.h, #0
42; CHECK-NEXT:    ld1h { z0.h }, p1/z, [x0]
43; CHECK-NEXT:    ld1h { z1.h }, p1/z, [x1]
44; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
45; CHECK-NEXT:    st1h { z0.h }, p1, [x0]
46; CHECK-NEXT:    ret
47  %op1 = load volatile <16 x half>, ptr %a
48  %op2 = load volatile <16 x half>, ptr %b
49  %sel = select i1 %mask, <16 x half> %op1, <16 x half> %op2
50  store <16 x half> %sel, ptr %a
51  ret void
52}
53
54define void @select_v32f16(ptr %a, ptr %b, i1 %mask) #0 {
55; VBITS_GE_256-LABEL: select_v32f16:
56; VBITS_GE_256:       // %bb.0:
57; VBITS_GE_256-NEXT:    mov z0.h, w2
58; VBITS_GE_256-NEXT:    ptrue p0.h
59; VBITS_GE_256-NEXT:    mov x8, #16 // =0x10
60; VBITS_GE_256-NEXT:    ptrue p1.h, vl16
61; VBITS_GE_256-NEXT:    and z0.h, z0.h, #0x1
62; VBITS_GE_256-NEXT:    cmpne p0.h, p0/z, z0.h, #0
63; VBITS_GE_256-NEXT:    ld1h { z0.h }, p1/z, [x0, x8, lsl #1]
64; VBITS_GE_256-NEXT:    ld1h { z1.h }, p1/z, [x0]
65; VBITS_GE_256-NEXT:    ld1h { z2.h }, p1/z, [x1, x8, lsl #1]
66; VBITS_GE_256-NEXT:    ld1h { z3.h }, p1/z, [x1]
67; VBITS_GE_256-NEXT:    sel z0.h, p0, z0.h, z2.h
68; VBITS_GE_256-NEXT:    sel z1.h, p0, z1.h, z3.h
69; VBITS_GE_256-NEXT:    st1h { z0.h }, p1, [x0, x8, lsl #1]
70; VBITS_GE_256-NEXT:    st1h { z1.h }, p1, [x0]
71; VBITS_GE_256-NEXT:    ret
72;
73; VBITS_GE_512-LABEL: select_v32f16:
74; VBITS_GE_512:       // %bb.0:
75; VBITS_GE_512-NEXT:    mov z0.h, w2
76; VBITS_GE_512-NEXT:    ptrue p0.h
77; VBITS_GE_512-NEXT:    ptrue p1.h, vl32
78; VBITS_GE_512-NEXT:    and z0.h, z0.h, #0x1
79; VBITS_GE_512-NEXT:    cmpne p0.h, p0/z, z0.h, #0
80; VBITS_GE_512-NEXT:    ld1h { z0.h }, p1/z, [x0]
81; VBITS_GE_512-NEXT:    ld1h { z1.h }, p1/z, [x1]
82; VBITS_GE_512-NEXT:    sel z0.h, p0, z0.h, z1.h
83; VBITS_GE_512-NEXT:    st1h { z0.h }, p1, [x0]
84; VBITS_GE_512-NEXT:    ret
85  %op1 = load volatile <32 x half>, ptr %a
86  %op2 = load volatile <32 x half>, ptr %b
87  %sel = select i1 %mask, <32 x half> %op1, <32 x half> %op2
88  store <32 x half> %sel, ptr %a
89  ret void
90}
91
92define void @select_v64f16(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
93; CHECK-LABEL: select_v64f16:
94; CHECK:       // %bb.0:
95; CHECK-NEXT:    mov z0.h, w2
96; CHECK-NEXT:    ptrue p0.h
97; CHECK-NEXT:    ptrue p1.h, vl64
98; CHECK-NEXT:    and z0.h, z0.h, #0x1
99; CHECK-NEXT:    cmpne p0.h, p0/z, z0.h, #0
100; CHECK-NEXT:    ld1h { z0.h }, p1/z, [x0]
101; CHECK-NEXT:    ld1h { z1.h }, p1/z, [x1]
102; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
103; CHECK-NEXT:    st1h { z0.h }, p1, [x0]
104; CHECK-NEXT:    ret
105  %op1 = load volatile <64 x half>, ptr %a
106  %op2 = load volatile <64 x half>, ptr %b
107  %sel = select i1 %mask, <64 x half> %op1, <64 x half> %op2
108  store <64 x half> %sel, ptr %a
109  ret void
110}
111
112define void @select_v128f16(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 {
113; CHECK-LABEL: select_v128f16:
114; CHECK:       // %bb.0:
115; CHECK-NEXT:    mov z0.h, w2
116; CHECK-NEXT:    ptrue p0.h
117; CHECK-NEXT:    ptrue p1.h, vl128
118; CHECK-NEXT:    and z0.h, z0.h, #0x1
119; CHECK-NEXT:    cmpne p0.h, p0/z, z0.h, #0
120; CHECK-NEXT:    ld1h { z0.h }, p1/z, [x0]
121; CHECK-NEXT:    ld1h { z1.h }, p1/z, [x1]
122; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
123; CHECK-NEXT:    st1h { z0.h }, p1, [x0]
124; CHECK-NEXT:    ret
125  %op1 = load volatile <128 x half>, ptr %a
126  %op2 = load volatile <128 x half>, ptr %b
127  %sel = select i1 %mask, <128 x half> %op1, <128 x half> %op2
128  store <128 x half> %sel, ptr %a
129  ret void
130}
131
132; Don't use SVE for 64-bit vectors.
133define <2 x float> @select_v2f32(<2 x float> %op1, <2 x float> %op2, i1 %mask) vscale_range(2,0) #0 {
134; CHECK-LABEL: select_v2f32:
135; CHECK:       // %bb.0:
136; CHECK-NEXT:    tst w0, #0x1
137; CHECK-NEXT:    csetm w8, ne
138; CHECK-NEXT:    dup v2.2s, w8
139; CHECK-NEXT:    bif v0.8b, v1.8b, v2.8b
140; CHECK-NEXT:    ret
141  %sel = select i1 %mask, <2 x float> %op1, <2 x float> %op2
142  ret <2 x float> %sel
143}
144
145; Don't use SVE for 128-bit vectors.
146define <4 x float> @select_v4f32(<4 x float> %op1, <4 x float> %op2, i1 %mask) vscale_range(2,0) #0 {
147; CHECK-LABEL: select_v4f32:
148; CHECK:       // %bb.0:
149; CHECK-NEXT:    tst w0, #0x1
150; CHECK-NEXT:    csetm w8, ne
151; CHECK-NEXT:    dup v2.4s, w8
152; CHECK-NEXT:    bif v0.16b, v1.16b, v2.16b
153; CHECK-NEXT:    ret
154  %sel = select i1 %mask, <4 x float> %op1, <4 x float> %op2
155  ret <4 x float> %sel
156}
157
158define void @select_v8f32(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
159; CHECK-LABEL: select_v8f32:
160; CHECK:       // %bb.0:
161; CHECK-NEXT:    and w8, w2, #0x1
162; CHECK-NEXT:    ptrue p0.s
163; CHECK-NEXT:    mov z0.s, w8
164; CHECK-NEXT:    ptrue p1.s, vl8
165; CHECK-NEXT:    cmpne p0.s, p0/z, z0.s, #0
166; CHECK-NEXT:    ld1w { z0.s }, p1/z, [x0]
167; CHECK-NEXT:    ld1w { z1.s }, p1/z, [x1]
168; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
169; CHECK-NEXT:    st1w { z0.s }, p1, [x0]
170; CHECK-NEXT:    ret
171  %op1 = load volatile <8 x float>, ptr %a
172  %op2 = load volatile <8 x float>, ptr %b
173  %sel = select i1 %mask, <8 x float> %op1, <8 x float> %op2
174  store <8 x float> %sel, ptr %a
175  ret void
176}
177
178define void @select_v16f32(ptr %a, ptr %b, i1 %mask) #0 {
179; VBITS_GE_256-LABEL: select_v16f32:
180; VBITS_GE_256:       // %bb.0:
181; VBITS_GE_256-NEXT:    and w8, w2, #0x1
182; VBITS_GE_256-NEXT:    ptrue p0.s
183; VBITS_GE_256-NEXT:    mov z0.s, w8
184; VBITS_GE_256-NEXT:    ptrue p1.s, vl8
185; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
186; VBITS_GE_256-NEXT:    cmpne p0.s, p0/z, z0.s, #0
187; VBITS_GE_256-NEXT:    ld1w { z0.s }, p1/z, [x0, x8, lsl #2]
188; VBITS_GE_256-NEXT:    ld1w { z1.s }, p1/z, [x0]
189; VBITS_GE_256-NEXT:    ld1w { z2.s }, p1/z, [x1, x8, lsl #2]
190; VBITS_GE_256-NEXT:    ld1w { z3.s }, p1/z, [x1]
191; VBITS_GE_256-NEXT:    sel z0.s, p0, z0.s, z2.s
192; VBITS_GE_256-NEXT:    sel z1.s, p0, z1.s, z3.s
193; VBITS_GE_256-NEXT:    st1w { z0.s }, p1, [x0, x8, lsl #2]
194; VBITS_GE_256-NEXT:    st1w { z1.s }, p1, [x0]
195; VBITS_GE_256-NEXT:    ret
196;
197; VBITS_GE_512-LABEL: select_v16f32:
198; VBITS_GE_512:       // %bb.0:
199; VBITS_GE_512-NEXT:    and w8, w2, #0x1
200; VBITS_GE_512-NEXT:    ptrue p0.s
201; VBITS_GE_512-NEXT:    mov z0.s, w8
202; VBITS_GE_512-NEXT:    ptrue p1.s, vl16
203; VBITS_GE_512-NEXT:    cmpne p0.s, p0/z, z0.s, #0
204; VBITS_GE_512-NEXT:    ld1w { z0.s }, p1/z, [x0]
205; VBITS_GE_512-NEXT:    ld1w { z1.s }, p1/z, [x1]
206; VBITS_GE_512-NEXT:    sel z0.s, p0, z0.s, z1.s
207; VBITS_GE_512-NEXT:    st1w { z0.s }, p1, [x0]
208; VBITS_GE_512-NEXT:    ret
209  %op1 = load volatile <16 x float>, ptr %a
210  %op2 = load volatile <16 x float>, ptr %b
211  %sel = select i1 %mask, <16 x float> %op1, <16 x float> %op2
212  store <16 x float> %sel, ptr %a
213  ret void
214}
215
216define void @select_v32f32(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
217; CHECK-LABEL: select_v32f32:
218; CHECK:       // %bb.0:
219; CHECK-NEXT:    and w8, w2, #0x1
220; CHECK-NEXT:    ptrue p0.s
221; CHECK-NEXT:    mov z0.s, w8
222; CHECK-NEXT:    ptrue p1.s, vl32
223; CHECK-NEXT:    cmpne p0.s, p0/z, z0.s, #0
224; CHECK-NEXT:    ld1w { z0.s }, p1/z, [x0]
225; CHECK-NEXT:    ld1w { z1.s }, p1/z, [x1]
226; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
227; CHECK-NEXT:    st1w { z0.s }, p1, [x0]
228; CHECK-NEXT:    ret
229  %op1 = load volatile <32 x float>, ptr %a
230  %op2 = load volatile <32 x float>, ptr %b
231  %sel = select i1 %mask, <32 x float> %op1, <32 x float> %op2
232  store <32 x float> %sel, ptr %a
233  ret void
234}
235
236define void @select_v64f32(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 {
237; CHECK-LABEL: select_v64f32:
238; CHECK:       // %bb.0:
239; CHECK-NEXT:    and w8, w2, #0x1
240; CHECK-NEXT:    ptrue p0.s
241; CHECK-NEXT:    mov z0.s, w8
242; CHECK-NEXT:    ptrue p1.s, vl64
243; CHECK-NEXT:    cmpne p0.s, p0/z, z0.s, #0
244; CHECK-NEXT:    ld1w { z0.s }, p1/z, [x0]
245; CHECK-NEXT:    ld1w { z1.s }, p1/z, [x1]
246; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
247; CHECK-NEXT:    st1w { z0.s }, p1, [x0]
248; CHECK-NEXT:    ret
249  %op1 = load volatile <64 x float>, ptr %a
250  %op2 = load volatile <64 x float>, ptr %b
251  %sel = select i1 %mask, <64 x float> %op1, <64 x float> %op2
252  store <64 x float> %sel, ptr %a
253  ret void
254}
255
256; Don't use SVE for 64-bit vectors.
257define <1 x double> @select_v1f64(<1 x double> %op1, <1 x double> %op2, i1 %mask) vscale_range(2,0) #0 {
258; CHECK-LABEL: select_v1f64:
259; CHECK:       // %bb.0:
260; CHECK-NEXT:    tst w0, #0x1
261; CHECK-NEXT:    csetm x8, ne
262; CHECK-NEXT:    fmov d2, x8
263; CHECK-NEXT:    bif v0.8b, v1.8b, v2.8b
264; CHECK-NEXT:    ret
265  %sel = select i1 %mask, <1 x double> %op1, <1 x double> %op2
266  ret <1 x double> %sel
267}
268
269; Don't use SVE for 128-bit vectors.
270define <2 x double> @select_v2f64(<2 x double> %op1, <2 x double> %op2, i1 %mask) vscale_range(2,0) #0 {
271; CHECK-LABEL: select_v2f64:
272; CHECK:       // %bb.0:
273; CHECK-NEXT:    tst w0, #0x1
274; CHECK-NEXT:    csetm x8, ne
275; CHECK-NEXT:    dup v2.2d, x8
276; CHECK-NEXT:    bif v0.16b, v1.16b, v2.16b
277; CHECK-NEXT:    ret
278  %sel = select i1 %mask, <2 x double> %op1, <2 x double> %op2
279  ret <2 x double> %sel
280}
281
282define void @select_v4f64(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
283; CHECK-LABEL: select_v4f64:
284; CHECK:       // %bb.0:
285; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
286; CHECK-NEXT:    and x8, x2, #0x1
287; CHECK-NEXT:    ptrue p0.d
288; CHECK-NEXT:    mov z0.d, x8
289; CHECK-NEXT:    ptrue p1.d, vl4
290; CHECK-NEXT:    cmpne p0.d, p0/z, z0.d, #0
291; CHECK-NEXT:    ld1d { z0.d }, p1/z, [x0]
292; CHECK-NEXT:    ld1d { z1.d }, p1/z, [x1]
293; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
294; CHECK-NEXT:    st1d { z0.d }, p1, [x0]
295; CHECK-NEXT:    ret
296  %op1 = load volatile <4 x double>, ptr %a
297  %op2 = load volatile <4 x double>, ptr %b
298  %sel = select i1 %mask, <4 x double> %op1, <4 x double> %op2
299  store <4 x double> %sel, ptr %a
300  ret void
301}
302
303define void @select_v8f64(ptr %a, ptr %b, i1 %mask) #0 {
304; VBITS_GE_256-LABEL: select_v8f64:
305; VBITS_GE_256:       // %bb.0:
306; VBITS_GE_256-NEXT:    // kill: def $w2 killed $w2 def $x2
307; VBITS_GE_256-NEXT:    and x8, x2, #0x1
308; VBITS_GE_256-NEXT:    ptrue p0.d
309; VBITS_GE_256-NEXT:    mov z0.d, x8
310; VBITS_GE_256-NEXT:    ptrue p1.d, vl4
311; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
312; VBITS_GE_256-NEXT:    cmpne p0.d, p0/z, z0.d, #0
313; VBITS_GE_256-NEXT:    ld1d { z0.d }, p1/z, [x0, x8, lsl #3]
314; VBITS_GE_256-NEXT:    ld1d { z1.d }, p1/z, [x0]
315; VBITS_GE_256-NEXT:    ld1d { z2.d }, p1/z, [x1, x8, lsl #3]
316; VBITS_GE_256-NEXT:    ld1d { z3.d }, p1/z, [x1]
317; VBITS_GE_256-NEXT:    sel z0.d, p0, z0.d, z2.d
318; VBITS_GE_256-NEXT:    sel z1.d, p0, z1.d, z3.d
319; VBITS_GE_256-NEXT:    st1d { z0.d }, p1, [x0, x8, lsl #3]
320; VBITS_GE_256-NEXT:    st1d { z1.d }, p1, [x0]
321; VBITS_GE_256-NEXT:    ret
322;
323; VBITS_GE_512-LABEL: select_v8f64:
324; VBITS_GE_512:       // %bb.0:
325; VBITS_GE_512-NEXT:    // kill: def $w2 killed $w2 def $x2
326; VBITS_GE_512-NEXT:    and x8, x2, #0x1
327; VBITS_GE_512-NEXT:    ptrue p0.d
328; VBITS_GE_512-NEXT:    mov z0.d, x8
329; VBITS_GE_512-NEXT:    ptrue p1.d, vl8
330; VBITS_GE_512-NEXT:    cmpne p0.d, p0/z, z0.d, #0
331; VBITS_GE_512-NEXT:    ld1d { z0.d }, p1/z, [x0]
332; VBITS_GE_512-NEXT:    ld1d { z1.d }, p1/z, [x1]
333; VBITS_GE_512-NEXT:    sel z0.d, p0, z0.d, z1.d
334; VBITS_GE_512-NEXT:    st1d { z0.d }, p1, [x0]
335; VBITS_GE_512-NEXT:    ret
336  %op1 = load volatile <8 x double>, ptr %a
337  %op2 = load volatile <8 x double>, ptr %b
338  %sel = select i1 %mask, <8 x double> %op1, <8 x double> %op2
339  store <8 x double> %sel, ptr %a
340  ret void
341}
342
343define void @select_v16f64(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
344; CHECK-LABEL: select_v16f64:
345; CHECK:       // %bb.0:
346; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
347; CHECK-NEXT:    and x8, x2, #0x1
348; CHECK-NEXT:    ptrue p0.d
349; CHECK-NEXT:    mov z0.d, x8
350; CHECK-NEXT:    ptrue p1.d, vl16
351; CHECK-NEXT:    cmpne p0.d, p0/z, z0.d, #0
352; CHECK-NEXT:    ld1d { z0.d }, p1/z, [x0]
353; CHECK-NEXT:    ld1d { z1.d }, p1/z, [x1]
354; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
355; CHECK-NEXT:    st1d { z0.d }, p1, [x0]
356; CHECK-NEXT:    ret
357  %op1 = load volatile <16 x double>, ptr %a
358  %op2 = load volatile <16 x double>, ptr %b
359  %sel = select i1 %mask, <16 x double> %op1, <16 x double> %op2
360  store <16 x double> %sel, ptr %a
361  ret void
362}
363
364define void @select_v32f64(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 {
365; CHECK-LABEL: select_v32f64:
366; CHECK:       // %bb.0:
367; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
368; CHECK-NEXT:    and x8, x2, #0x1
369; CHECK-NEXT:    ptrue p0.d
370; CHECK-NEXT:    mov z0.d, x8
371; CHECK-NEXT:    ptrue p1.d, vl32
372; CHECK-NEXT:    cmpne p0.d, p0/z, z0.d, #0
373; CHECK-NEXT:    ld1d { z0.d }, p1/z, [x0]
374; CHECK-NEXT:    ld1d { z1.d }, p1/z, [x1]
375; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
376; CHECK-NEXT:    st1d { z0.d }, p1, [x0]
377; CHECK-NEXT:    ret
378  %op1 = load volatile <32 x double>, ptr %a
379  %op2 = load volatile <32 x double>, ptr %b
380  %sel = select i1 %mask, <32 x double> %op1, <32 x double> %op2
381  store <32 x double> %sel, ptr %a
382  ret void
383}
384
385attributes #0 = { "target-features"="+sve" }
386