xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-compares.ll (revision ab7110bcd6b137803935508de8c9f6af377f9454)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -aarch64-sve-vector-bits-min=256  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
3; RUN: llc -aarch64-sve-vector-bits-min=512  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
4; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
5
6target triple = "aarch64-unknown-linux-gnu"
7
8;
9; FCMP OEQ
10;
11
12; Don't use SVE for 64-bit vectors.
13define <4 x i16> @fcmp_oeq_v4f16(<4 x half> %op1, <4 x half> %op2) vscale_range(2,0) #0 {
14; CHECK-LABEL: fcmp_oeq_v4f16:
15; CHECK:       // %bb.0:
16; CHECK-NEXT:    fcmeq v0.4h, v0.4h, v1.4h
17; CHECK-NEXT:    ret
18  %cmp = fcmp oeq <4 x half> %op1, %op2
19  %sext = sext <4 x i1> %cmp to <4 x i16>
20  ret <4 x i16> %sext
21}
22
23; Don't use SVE for 128-bit vectors.
24define <8 x i16> @fcmp_oeq_v8f16(<8 x half> %op1, <8 x half> %op2) vscale_range(2,0) #0 {
25; CHECK-LABEL: fcmp_oeq_v8f16:
26; CHECK:       // %bb.0:
27; CHECK-NEXT:    fcmeq v0.8h, v0.8h, v1.8h
28; CHECK-NEXT:    ret
29  %cmp = fcmp oeq <8 x half> %op1, %op2
30  %sext = sext <8 x i1> %cmp to <8 x i16>
31  ret <8 x i16> %sext
32}
33
34define void @fcmp_oeq_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
35; CHECK-LABEL: fcmp_oeq_v16f16:
36; CHECK:       // %bb.0:
37; CHECK-NEXT:    ptrue p0.h, vl16
38; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
39; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
40; CHECK-NEXT:    fcmeq p1.h, p0/z, z0.h, z1.h
41; CHECK-NEXT:    mov z0.h, p1/z, #-1 // =0xffffffffffffffff
42; CHECK-NEXT:    st1h { z0.h }, p0, [x2]
43; CHECK-NEXT:    ret
44  %op1 = load <16 x half>, ptr %a
45  %op2 = load <16 x half>, ptr %b
46  %cmp = fcmp oeq <16 x half> %op1, %op2
47  %sext = sext <16 x i1> %cmp to <16 x i16>
48  store <16 x i16> %sext, ptr %c
49  ret void
50}
51
52define void @fcmp_oeq_v32f16(ptr %a, ptr %b, ptr %c) #0 {
53; VBITS_GE_256-LABEL: fcmp_oeq_v32f16:
54; VBITS_GE_256:       // %bb.0:
55; VBITS_GE_256-NEXT:    ptrue p0.h, vl16
56; VBITS_GE_256-NEXT:    mov x8, #16 // =0x10
57; VBITS_GE_256-NEXT:    ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
58; VBITS_GE_256-NEXT:    ld1h { z1.h }, p0/z, [x1, x8, lsl #1]
59; VBITS_GE_256-NEXT:    ld1h { z2.h }, p0/z, [x0]
60; VBITS_GE_256-NEXT:    ld1h { z3.h }, p0/z, [x1]
61; VBITS_GE_256-NEXT:    fcmeq p1.h, p0/z, z0.h, z1.h
62; VBITS_GE_256-NEXT:    fcmeq p2.h, p0/z, z2.h, z3.h
63; VBITS_GE_256-NEXT:    mov z0.h, p1/z, #-1 // =0xffffffffffffffff
64; VBITS_GE_256-NEXT:    mov z1.h, p2/z, #-1 // =0xffffffffffffffff
65; VBITS_GE_256-NEXT:    st1h { z0.h }, p0, [x2, x8, lsl #1]
66; VBITS_GE_256-NEXT:    st1h { z1.h }, p0, [x2]
67; VBITS_GE_256-NEXT:    ret
68;
69; VBITS_GE_512-LABEL: fcmp_oeq_v32f16:
70; VBITS_GE_512:       // %bb.0:
71; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
72; VBITS_GE_512-NEXT:    ld1h { z0.h }, p0/z, [x0]
73; VBITS_GE_512-NEXT:    ld1h { z1.h }, p0/z, [x1]
74; VBITS_GE_512-NEXT:    fcmeq p1.h, p0/z, z0.h, z1.h
75; VBITS_GE_512-NEXT:    mov z0.h, p1/z, #-1 // =0xffffffffffffffff
76; VBITS_GE_512-NEXT:    st1h { z0.h }, p0, [x2]
77; VBITS_GE_512-NEXT:    ret
78  %op1 = load <32 x half>, ptr %a
79  %op2 = load <32 x half>, ptr %b
80  %cmp = fcmp oeq <32 x half> %op1, %op2
81  %sext = sext <32 x i1> %cmp to <32 x i16>
82  store <32 x i16> %sext, ptr %c
83  ret void
84}
85
86define void @fcmp_oeq_v64f16(ptr %a, ptr %b, ptr %c) vscale_range(8,0) #0 {
87; CHECK-LABEL: fcmp_oeq_v64f16:
88; CHECK:       // %bb.0:
89; CHECK-NEXT:    ptrue p0.h, vl64
90; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
91; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
92; CHECK-NEXT:    fcmeq p1.h, p0/z, z0.h, z1.h
93; CHECK-NEXT:    mov z0.h, p1/z, #-1 // =0xffffffffffffffff
94; CHECK-NEXT:    st1h { z0.h }, p0, [x2]
95; CHECK-NEXT:    ret
96  %op1 = load <64 x half>, ptr %a
97  %op2 = load <64 x half>, ptr %b
98  %cmp = fcmp oeq <64 x half> %op1, %op2
99  %sext = sext <64 x i1> %cmp to <64 x i16>
100  store <64 x i16> %sext, ptr %c
101  ret void
102}
103
104define void @fcmp_oeq_v128f16(ptr %a, ptr %b, ptr %c) vscale_range(16,0) #0 {
105; CHECK-LABEL: fcmp_oeq_v128f16:
106; CHECK:       // %bb.0:
107; CHECK-NEXT:    ptrue p0.h, vl128
108; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
109; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
110; CHECK-NEXT:    fcmeq p1.h, p0/z, z0.h, z1.h
111; CHECK-NEXT:    mov z0.h, p1/z, #-1 // =0xffffffffffffffff
112; CHECK-NEXT:    st1h { z0.h }, p0, [x2]
113; CHECK-NEXT:    ret
114  %op1 = load <128 x half>, ptr %a
115  %op2 = load <128 x half>, ptr %b
116  %cmp = fcmp oeq <128 x half> %op1, %op2
117  %sext = sext <128 x i1> %cmp to <128 x i16>
118  store <128 x i16> %sext, ptr %c
119  ret void
120}
121
122; Don't use SVE for 64-bit vectors.
123define <2 x i32> @fcmp_oeq_v2f32(<2 x float> %op1, <2 x float> %op2) vscale_range(2,0) #0 {
124; CHECK-LABEL: fcmp_oeq_v2f32:
125; CHECK:       // %bb.0:
126; CHECK-NEXT:    fcmeq v0.2s, v0.2s, v1.2s
127; CHECK-NEXT:    ret
128  %cmp = fcmp oeq <2 x float> %op1, %op2
129  %sext = sext <2 x i1> %cmp to <2 x i32>
130  ret <2 x i32> %sext
131}
132
133; Don't use SVE for 128-bit vectors.
134define <4 x i32> @fcmp_oeq_v4f32(<4 x float> %op1, <4 x float> %op2) vscale_range(2,0) #0 {
135; CHECK-LABEL: fcmp_oeq_v4f32:
136; CHECK:       // %bb.0:
137; CHECK-NEXT:    fcmeq v0.4s, v0.4s, v1.4s
138; CHECK-NEXT:    ret
139  %cmp = fcmp oeq <4 x float> %op1, %op2
140  %sext = sext <4 x i1> %cmp to <4 x i32>
141  ret <4 x i32> %sext
142}
143
144define void @fcmp_oeq_v8f32(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
145; CHECK-LABEL: fcmp_oeq_v8f32:
146; CHECK:       // %bb.0:
147; CHECK-NEXT:    ptrue p0.s, vl8
148; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
149; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x1]
150; CHECK-NEXT:    fcmeq p1.s, p0/z, z0.s, z1.s
151; CHECK-NEXT:    mov z0.s, p1/z, #-1 // =0xffffffffffffffff
152; CHECK-NEXT:    st1w { z0.s }, p0, [x2]
153; CHECK-NEXT:    ret
154  %op1 = load <8 x float>, ptr %a
155  %op2 = load <8 x float>, ptr %b
156  %cmp = fcmp oeq <8 x float> %op1, %op2
157  %sext = sext <8 x i1> %cmp to <8 x i32>
158  store <8 x i32> %sext, ptr %c
159  ret void
160}
161
162define void @fcmp_oeq_v16f32(ptr %a, ptr %b, ptr %c) #0 {
163; VBITS_GE_256-LABEL: fcmp_oeq_v16f32:
164; VBITS_GE_256:       // %bb.0:
165; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
166; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
167; VBITS_GE_256-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
168; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x1, x8, lsl #2]
169; VBITS_GE_256-NEXT:    ld1w { z2.s }, p0/z, [x0]
170; VBITS_GE_256-NEXT:    ld1w { z3.s }, p0/z, [x1]
171; VBITS_GE_256-NEXT:    fcmeq p1.s, p0/z, z0.s, z1.s
172; VBITS_GE_256-NEXT:    fcmeq p2.s, p0/z, z2.s, z3.s
173; VBITS_GE_256-NEXT:    mov z0.s, p1/z, #-1 // =0xffffffffffffffff
174; VBITS_GE_256-NEXT:    mov z1.s, p2/z, #-1 // =0xffffffffffffffff
175; VBITS_GE_256-NEXT:    st1w { z0.s }, p0, [x2, x8, lsl #2]
176; VBITS_GE_256-NEXT:    st1w { z1.s }, p0, [x2]
177; VBITS_GE_256-NEXT:    ret
178;
179; VBITS_GE_512-LABEL: fcmp_oeq_v16f32:
180; VBITS_GE_512:       // %bb.0:
181; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
182; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
183; VBITS_GE_512-NEXT:    ld1w { z1.s }, p0/z, [x1]
184; VBITS_GE_512-NEXT:    fcmeq p1.s, p0/z, z0.s, z1.s
185; VBITS_GE_512-NEXT:    mov z0.s, p1/z, #-1 // =0xffffffffffffffff
186; VBITS_GE_512-NEXT:    st1w { z0.s }, p0, [x2]
187; VBITS_GE_512-NEXT:    ret
188  %op1 = load <16 x float>, ptr %a
189  %op2 = load <16 x float>, ptr %b
190  %cmp = fcmp oeq <16 x float> %op1, %op2
191  %sext = sext <16 x i1> %cmp to <16 x i32>
192  store <16 x i32> %sext, ptr %c
193  ret void
194}
195
196define void @fcmp_oeq_v32f32(ptr %a, ptr %b, ptr %c) vscale_range(8,0) #0 {
197; CHECK-LABEL: fcmp_oeq_v32f32:
198; CHECK:       // %bb.0:
199; CHECK-NEXT:    ptrue p0.s, vl32
200; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
201; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x1]
202; CHECK-NEXT:    fcmeq p1.s, p0/z, z0.s, z1.s
203; CHECK-NEXT:    mov z0.s, p1/z, #-1 // =0xffffffffffffffff
204; CHECK-NEXT:    st1w { z0.s }, p0, [x2]
205; CHECK-NEXT:    ret
206  %op1 = load <32 x float>, ptr %a
207  %op2 = load <32 x float>, ptr %b
208  %cmp = fcmp oeq <32 x float> %op1, %op2
209  %sext = sext <32 x i1> %cmp to <32 x i32>
210  store <32 x i32> %sext, ptr %c
211  ret void
212}
213
214define void @fcmp_oeq_v64f32(ptr %a, ptr %b, ptr %c) vscale_range(16,0) #0 {
215; CHECK-LABEL: fcmp_oeq_v64f32:
216; CHECK:       // %bb.0:
217; CHECK-NEXT:    ptrue p0.s, vl64
218; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
219; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x1]
220; CHECK-NEXT:    fcmeq p1.s, p0/z, z0.s, z1.s
221; CHECK-NEXT:    mov z0.s, p1/z, #-1 // =0xffffffffffffffff
222; CHECK-NEXT:    st1w { z0.s }, p0, [x2]
223; CHECK-NEXT:    ret
224  %op1 = load <64 x float>, ptr %a
225  %op2 = load <64 x float>, ptr %b
226  %cmp = fcmp oeq <64 x float> %op1, %op2
227  %sext = sext <64 x i1> %cmp to <64 x i32>
228  store <64 x i32> %sext, ptr %c
229  ret void
230}
231
232; Don't use SVE for 64-bit vectors.
233define <1 x i64> @fcmp_oeq_v1f64(<1 x double> %op1, <1 x double> %op2) vscale_range(2,0) #0 {
234; CHECK-LABEL: fcmp_oeq_v1f64:
235; CHECK:       // %bb.0:
236; CHECK-NEXT:    fcmeq d0, d0, d1
237; CHECK-NEXT:    ret
238  %cmp = fcmp oeq <1 x double> %op1, %op2
239  %sext = sext <1 x i1> %cmp to <1 x i64>
240  ret <1 x i64> %sext
241}
242
243; Don't use SVE for 128-bit vectors.
244define <2 x i64> @fcmp_oeq_v2f64(<2 x double> %op1, <2 x double> %op2) vscale_range(2,0) #0 {
245; CHECK-LABEL: fcmp_oeq_v2f64:
246; CHECK:       // %bb.0:
247; CHECK-NEXT:    fcmeq v0.2d, v0.2d, v1.2d
248; CHECK-NEXT:    ret
249  %cmp = fcmp oeq <2 x double> %op1, %op2
250  %sext = sext <2 x i1> %cmp to <2 x i64>
251  ret <2 x i64> %sext
252}
253
254define void @fcmp_oeq_v4f64(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
255; CHECK-LABEL: fcmp_oeq_v4f64:
256; CHECK:       // %bb.0:
257; CHECK-NEXT:    ptrue p0.d, vl4
258; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
259; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x1]
260; CHECK-NEXT:    fcmeq p1.d, p0/z, z0.d, z1.d
261; CHECK-NEXT:    mov z0.d, p1/z, #-1 // =0xffffffffffffffff
262; CHECK-NEXT:    st1d { z0.d }, p0, [x2]
263; CHECK-NEXT:    ret
264  %op1 = load <4 x double>, ptr %a
265  %op2 = load <4 x double>, ptr %b
266  %cmp = fcmp oeq <4 x double> %op1, %op2
267  %sext = sext <4 x i1> %cmp to <4 x i64>
268  store <4 x i64> %sext, ptr %c
269  ret void
270}
271
272define void @fcmp_oeq_v8f64(ptr %a, ptr %b, ptr %c) #0 {
273; VBITS_GE_256-LABEL: fcmp_oeq_v8f64:
274; VBITS_GE_256:       // %bb.0:
275; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
276; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
277; VBITS_GE_256-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
278; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x1, x8, lsl #3]
279; VBITS_GE_256-NEXT:    ld1d { z2.d }, p0/z, [x0]
280; VBITS_GE_256-NEXT:    ld1d { z3.d }, p0/z, [x1]
281; VBITS_GE_256-NEXT:    fcmeq p1.d, p0/z, z0.d, z1.d
282; VBITS_GE_256-NEXT:    fcmeq p2.d, p0/z, z2.d, z3.d
283; VBITS_GE_256-NEXT:    mov z0.d, p1/z, #-1 // =0xffffffffffffffff
284; VBITS_GE_256-NEXT:    mov z1.d, p2/z, #-1 // =0xffffffffffffffff
285; VBITS_GE_256-NEXT:    st1d { z0.d }, p0, [x2, x8, lsl #3]
286; VBITS_GE_256-NEXT:    st1d { z1.d }, p0, [x2]
287; VBITS_GE_256-NEXT:    ret
288;
289; VBITS_GE_512-LABEL: fcmp_oeq_v8f64:
290; VBITS_GE_512:       // %bb.0:
291; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
292; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
293; VBITS_GE_512-NEXT:    ld1d { z1.d }, p0/z, [x1]
294; VBITS_GE_512-NEXT:    fcmeq p1.d, p0/z, z0.d, z1.d
295; VBITS_GE_512-NEXT:    mov z0.d, p1/z, #-1 // =0xffffffffffffffff
296; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x2]
297; VBITS_GE_512-NEXT:    ret
298  %op1 = load <8 x double>, ptr %a
299  %op2 = load <8 x double>, ptr %b
300  %cmp = fcmp oeq <8 x double> %op1, %op2
301  %sext = sext <8 x i1> %cmp to <8 x i64>
302  store <8 x i64> %sext, ptr %c
303  ret void
304}
305
306define void @fcmp_oeq_v16f64(ptr %a, ptr %b, ptr %c) vscale_range(8,0) #0 {
307; CHECK-LABEL: fcmp_oeq_v16f64:
308; CHECK:       // %bb.0:
309; CHECK-NEXT:    ptrue p0.d, vl16
310; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
311; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x1]
312; CHECK-NEXT:    fcmeq p1.d, p0/z, z0.d, z1.d
313; CHECK-NEXT:    mov z0.d, p1/z, #-1 // =0xffffffffffffffff
314; CHECK-NEXT:    st1d { z0.d }, p0, [x2]
315; CHECK-NEXT:    ret
316  %op1 = load <16 x double>, ptr %a
317  %op2 = load <16 x double>, ptr %b
318  %cmp = fcmp oeq <16 x double> %op1, %op2
319  %sext = sext <16 x i1> %cmp to <16 x i64>
320  store <16 x i64> %sext, ptr %c
321  ret void
322}
323
324define void @fcmp_oeq_v32f64(ptr %a, ptr %b, ptr %c) vscale_range(16,0) #0 {
325; CHECK-LABEL: fcmp_oeq_v32f64:
326; CHECK:       // %bb.0:
327; CHECK-NEXT:    ptrue p0.d, vl32
328; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
329; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x1]
330; CHECK-NEXT:    fcmeq p1.d, p0/z, z0.d, z1.d
331; CHECK-NEXT:    mov z0.d, p1/z, #-1 // =0xffffffffffffffff
332; CHECK-NEXT:    st1d { z0.d }, p0, [x2]
333; CHECK-NEXT:    ret
334  %op1 = load <32 x double>, ptr %a
335  %op2 = load <32 x double>, ptr %b
336  %cmp = fcmp oeq <32 x double> %op1, %op2
337  %sext = sext <32 x i1> %cmp to <32 x i64>
338  store <32 x i64> %sext, ptr %c
339  ret void
340}
341
342;
343; FCMP UEQ
344;
345
346define void @fcmp_ueq_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
347; CHECK-LABEL: fcmp_ueq_v16f16:
348; CHECK:       // %bb.0:
349; CHECK-NEXT:    ptrue p0.h, vl16
350; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
351; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
352; CHECK-NEXT:    fcmuo p1.h, p0/z, z0.h, z1.h
353; CHECK-NEXT:    fcmeq p2.h, p0/z, z0.h, z1.h
354; CHECK-NEXT:    mov p1.b, p2/m, p2.b
355; CHECK-NEXT:    mov z0.h, p1/z, #-1 // =0xffffffffffffffff
356; CHECK-NEXT:    st1h { z0.h }, p0, [x2]
357; CHECK-NEXT:    ret
358  %op1 = load <16 x half>, ptr %a
359  %op2 = load <16 x half>, ptr %b
360  %cmp = fcmp ueq <16 x half> %op1, %op2
361  %sext = sext <16 x i1> %cmp to <16 x i16>
362  store <16 x i16> %sext, ptr %c
363  ret void
364}
365
366;
367; FCMP ONE
368;
369
370define void @fcmp_one_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
371; CHECK-LABEL: fcmp_one_v16f16:
372; CHECK:       // %bb.0:
373; CHECK-NEXT:    ptrue p0.h, vl16
374; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
375; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
376; CHECK-NEXT:    fcmgt p1.h, p0/z, z1.h, z0.h
377; CHECK-NEXT:    fcmgt p2.h, p0/z, z0.h, z1.h
378; CHECK-NEXT:    mov p1.b, p2/m, p2.b
379; CHECK-NEXT:    mov z0.h, p1/z, #-1 // =0xffffffffffffffff
380; CHECK-NEXT:    st1h { z0.h }, p0, [x2]
381; CHECK-NEXT:    ret
382  %op1 = load <16 x half>, ptr %a
383  %op2 = load <16 x half>, ptr %b
384  %cmp = fcmp one <16 x half> %op1, %op2
385  %sext = sext <16 x i1> %cmp to <16 x i16>
386  store <16 x i16> %sext, ptr %c
387  ret void
388}
389
390;
391; FCMP UNE
392;
393
394define void @fcmp_une_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
395; CHECK-LABEL: fcmp_une_v16f16:
396; CHECK:       // %bb.0:
397; CHECK-NEXT:    ptrue p0.h, vl16
398; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
399; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
400; CHECK-NEXT:    fcmne p1.h, p0/z, z0.h, z1.h
401; CHECK-NEXT:    mov z0.h, p1/z, #-1 // =0xffffffffffffffff
402; CHECK-NEXT:    st1h { z0.h }, p0, [x2]
403; CHECK-NEXT:    ret
404  %op1 = load <16 x half>, ptr %a
405  %op2 = load <16 x half>, ptr %b
406  %cmp = fcmp une <16 x half> %op1, %op2
407  %sext = sext <16 x i1> %cmp to <16 x i16>
408  store <16 x i16> %sext, ptr %c
409  ret void
410}
411
412;
413; FCMP OGT
414;
415
416define void @fcmp_ogt_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
417; CHECK-LABEL: fcmp_ogt_v16f16:
418; CHECK:       // %bb.0:
419; CHECK-NEXT:    ptrue p0.h, vl16
420; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
421; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
422; CHECK-NEXT:    fcmgt p1.h, p0/z, z0.h, z1.h
423; CHECK-NEXT:    mov z0.h, p1/z, #-1 // =0xffffffffffffffff
424; CHECK-NEXT:    st1h { z0.h }, p0, [x2]
425; CHECK-NEXT:    ret
426  %op1 = load <16 x half>, ptr %a
427  %op2 = load <16 x half>, ptr %b
428  %cmp = fcmp ogt <16 x half> %op1, %op2
429  %sext = sext <16 x i1> %cmp to <16 x i16>
430  store <16 x i16> %sext, ptr %c
431  ret void
432}
433
434;
435; FCMP UGT
436;
437
438define void @fcmp_ugt_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
439; CHECK-LABEL: fcmp_ugt_v16f16:
440; CHECK:       // %bb.0:
441; CHECK-NEXT:    ptrue p0.h, vl16
442; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
443; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
444; CHECK-NEXT:    fcmge p1.h, p0/z, z1.h, z0.h
445; CHECK-NEXT:    mov z1.h, #-1 // =0xffffffffffffffff
446; CHECK-NEXT:    mov z0.h, p1/z, #-1 // =0xffffffffffffffff
447; CHECK-NEXT:    eor z0.d, z0.d, z1.d
448; CHECK-NEXT:    st1h { z0.h }, p0, [x2]
449; CHECK-NEXT:    ret
450  %op1 = load <16 x half>, ptr %a
451  %op2 = load <16 x half>, ptr %b
452  %cmp = fcmp ugt <16 x half> %op1, %op2
453  %sext = sext <16 x i1> %cmp to <16 x i16>
454  store <16 x i16> %sext, ptr %c
455  ret void
456}
457
458;
459; FCMP OLT
460;
461
462define void @fcmp_olt_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
463; CHECK-LABEL: fcmp_olt_v16f16:
464; CHECK:       // %bb.0:
465; CHECK-NEXT:    ptrue p0.h, vl16
466; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
467; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
468; CHECK-NEXT:    fcmgt p1.h, p0/z, z1.h, z0.h
469; CHECK-NEXT:    mov z0.h, p1/z, #-1 // =0xffffffffffffffff
470; CHECK-NEXT:    st1h { z0.h }, p0, [x2]
471; CHECK-NEXT:    ret
472  %op1 = load <16 x half>, ptr %a
473  %op2 = load <16 x half>, ptr %b
474  %cmp = fcmp olt <16 x half> %op1, %op2
475  %sext = sext <16 x i1> %cmp to <16 x i16>
476  store <16 x i16> %sext, ptr %c
477  ret void
478}
479
480;
481; FCMP ULT
482;
483
484define void @fcmp_ult_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
485; CHECK-LABEL: fcmp_ult_v16f16:
486; CHECK:       // %bb.0:
487; CHECK-NEXT:    ptrue p0.h, vl16
488; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
489; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
490; CHECK-NEXT:    fcmge p1.h, p0/z, z0.h, z1.h
491; CHECK-NEXT:    mov z1.h, #-1 // =0xffffffffffffffff
492; CHECK-NEXT:    mov z0.h, p1/z, #-1 // =0xffffffffffffffff
493; CHECK-NEXT:    eor z0.d, z0.d, z1.d
494; CHECK-NEXT:    st1h { z0.h }, p0, [x2]
495; CHECK-NEXT:    ret
496  %op1 = load <16 x half>, ptr %a
497  %op2 = load <16 x half>, ptr %b
498  %cmp = fcmp ult <16 x half> %op1, %op2
499  %sext = sext <16 x i1> %cmp to <16 x i16>
500  store <16 x i16> %sext, ptr %c
501  ret void
502}
503
504;
505; FCMP OGE
506;
507
508define void @fcmp_oge_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
509; CHECK-LABEL: fcmp_oge_v16f16:
510; CHECK:       // %bb.0:
511; CHECK-NEXT:    ptrue p0.h, vl16
512; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
513; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
514; CHECK-NEXT:    fcmge p1.h, p0/z, z0.h, z1.h
515; CHECK-NEXT:    mov z0.h, p1/z, #-1 // =0xffffffffffffffff
516; CHECK-NEXT:    st1h { z0.h }, p0, [x2]
517; CHECK-NEXT:    ret
518  %op1 = load <16 x half>, ptr %a
519  %op2 = load <16 x half>, ptr %b
520  %cmp = fcmp oge <16 x half> %op1, %op2
521  %sext = sext <16 x i1> %cmp to <16 x i16>
522  store <16 x i16> %sext, ptr %c
523  ret void
524}
525
526;
527; FCMP UGE
528;
529
530define void @fcmp_uge_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
531; CHECK-LABEL: fcmp_uge_v16f16:
532; CHECK:       // %bb.0:
533; CHECK-NEXT:    ptrue p0.h, vl16
534; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
535; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
536; CHECK-NEXT:    fcmgt p1.h, p0/z, z1.h, z0.h
537; CHECK-NEXT:    mov z1.h, #-1 // =0xffffffffffffffff
538; CHECK-NEXT:    mov z0.h, p1/z, #-1 // =0xffffffffffffffff
539; CHECK-NEXT:    eor z0.d, z0.d, z1.d
540; CHECK-NEXT:    st1h { z0.h }, p0, [x2]
541; CHECK-NEXT:    ret
542  %op1 = load <16 x half>, ptr %a
543  %op2 = load <16 x half>, ptr %b
544  %cmp = fcmp uge <16 x half> %op1, %op2
545  %sext = sext <16 x i1> %cmp to <16 x i16>
546  store <16 x i16> %sext, ptr %c
547  ret void
548}
549
550;
551; FCMP OLE
552;
553
554define void @fcmp_ole_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
555; CHECK-LABEL: fcmp_ole_v16f16:
556; CHECK:       // %bb.0:
557; CHECK-NEXT:    ptrue p0.h, vl16
558; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
559; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
560; CHECK-NEXT:    fcmge p1.h, p0/z, z1.h, z0.h
561; CHECK-NEXT:    mov z0.h, p1/z, #-1 // =0xffffffffffffffff
562; CHECK-NEXT:    st1h { z0.h }, p0, [x2]
563; CHECK-NEXT:    ret
564  %op1 = load <16 x half>, ptr %a
565  %op2 = load <16 x half>, ptr %b
566  %cmp = fcmp ole <16 x half> %op1, %op2
567  %sext = sext <16 x i1> %cmp to <16 x i16>
568  store <16 x i16> %sext, ptr %c
569  ret void
570}
571
572;
573; FCMP ULE
574;
575
576define void @fcmp_ule_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
577; CHECK-LABEL: fcmp_ule_v16f16:
578; CHECK:       // %bb.0:
579; CHECK-NEXT:    ptrue p0.h, vl16
580; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
581; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
582; CHECK-NEXT:    fcmgt p1.h, p0/z, z0.h, z1.h
583; CHECK-NEXT:    mov z1.h, #-1 // =0xffffffffffffffff
584; CHECK-NEXT:    mov z0.h, p1/z, #-1 // =0xffffffffffffffff
585; CHECK-NEXT:    eor z0.d, z0.d, z1.d
586; CHECK-NEXT:    st1h { z0.h }, p0, [x2]
587; CHECK-NEXT:    ret
588  %op1 = load <16 x half>, ptr %a
589  %op2 = load <16 x half>, ptr %b
590  %cmp = fcmp ule <16 x half> %op1, %op2
591  %sext = sext <16 x i1> %cmp to <16 x i16>
592  store <16 x i16> %sext, ptr %c
593  ret void
594}
595
596;
597; FCMP UNO
598;
599
600define void @fcmp_uno_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
601; CHECK-LABEL: fcmp_uno_v16f16:
602; CHECK:       // %bb.0:
603; CHECK-NEXT:    ptrue p0.h, vl16
604; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
605; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
606; CHECK-NEXT:    fcmuo p1.h, p0/z, z0.h, z1.h
607; CHECK-NEXT:    mov z0.h, p1/z, #-1 // =0xffffffffffffffff
608; CHECK-NEXT:    st1h { z0.h }, p0, [x2]
609; CHECK-NEXT:    ret
610  %op1 = load <16 x half>, ptr %a
611  %op2 = load <16 x half>, ptr %b
612  %cmp = fcmp uno <16 x half> %op1, %op2
613  %sext = sext <16 x i1> %cmp to <16 x i16>
614  store <16 x i16> %sext, ptr %c
615  ret void
616}
617
618;
619; FCMP ORD
620;
621
622define void @fcmp_ord_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
623; CHECK-LABEL: fcmp_ord_v16f16:
624; CHECK:       // %bb.0:
625; CHECK-NEXT:    ptrue p0.h, vl16
626; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
627; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
628; CHECK-NEXT:    fcmuo p1.h, p0/z, z0.h, z1.h
629; CHECK-NEXT:    mov z1.h, #-1 // =0xffffffffffffffff
630; CHECK-NEXT:    mov z0.h, p1/z, #-1 // =0xffffffffffffffff
631; CHECK-NEXT:    eor z0.d, z0.d, z1.d
632; CHECK-NEXT:    st1h { z0.h }, p0, [x2]
633; CHECK-NEXT:    ret
634  %op1 = load <16 x half>, ptr %a
635  %op2 = load <16 x half>, ptr %b
636  %cmp = fcmp ord <16 x half> %op1, %op2
637  %sext = sext <16 x i1> %cmp to <16 x i16>
638  store <16 x i16> %sext, ptr %c
639  ret void
640}
641
642;
643; FCMP EQ
644;
645
646define void @fcmp_eq_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
647; CHECK-LABEL: fcmp_eq_v16f16:
648; CHECK:       // %bb.0:
649; CHECK-NEXT:    ptrue p0.h, vl16
650; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
651; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
652; CHECK-NEXT:    fcmeq p1.h, p0/z, z0.h, z1.h
653; CHECK-NEXT:    mov z0.h, p1/z, #-1 // =0xffffffffffffffff
654; CHECK-NEXT:    st1h { z0.h }, p0, [x2]
655; CHECK-NEXT:    ret
656  %op1 = load <16 x half>, ptr %a
657  %op2 = load <16 x half>, ptr %b
658  %cmp = fcmp fast oeq <16 x half> %op1, %op2
659  %sext = sext <16 x i1> %cmp to <16 x i16>
660  store <16 x i16> %sext, ptr %c
661  ret void
662}
663
664;
665; FCMP NE
666;
667
668define void @fcmp_ne_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
669; CHECK-LABEL: fcmp_ne_v16f16:
670; CHECK:       // %bb.0:
671; CHECK-NEXT:    ptrue p0.h, vl16
672; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
673; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
674; CHECK-NEXT:    fcmne p1.h, p0/z, z0.h, z1.h
675; CHECK-NEXT:    mov z0.h, p1/z, #-1 // =0xffffffffffffffff
676; CHECK-NEXT:    st1h { z0.h }, p0, [x2]
677; CHECK-NEXT:    ret
678  %op1 = load <16 x half>, ptr %a
679  %op2 = load <16 x half>, ptr %b
680  %cmp = fcmp fast one <16 x half> %op1, %op2
681  %sext = sext <16 x i1> %cmp to <16 x i16>
682  store <16 x i16> %sext, ptr %c
683  ret void
684}
685
686;
687; FCMP GT
688;
689
690define void @fcmp_gt_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
691; CHECK-LABEL: fcmp_gt_v16f16:
692; CHECK:       // %bb.0:
693; CHECK-NEXT:    ptrue p0.h, vl16
694; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
695; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
696; CHECK-NEXT:    fcmgt p1.h, p0/z, z0.h, z1.h
697; CHECK-NEXT:    mov z0.h, p1/z, #-1 // =0xffffffffffffffff
698; CHECK-NEXT:    st1h { z0.h }, p0, [x2]
699; CHECK-NEXT:    ret
700  %op1 = load <16 x half>, ptr %a
701  %op2 = load <16 x half>, ptr %b
702  %cmp = fcmp fast ogt <16 x half> %op1, %op2
703  %sext = sext <16 x i1> %cmp to <16 x i16>
704  store <16 x i16> %sext, ptr %c
705  ret void
706}
707
708;
709; FCMP LT
710;
711
712define void @fcmp_lt_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
713; CHECK-LABEL: fcmp_lt_v16f16:
714; CHECK:       // %bb.0:
715; CHECK-NEXT:    ptrue p0.h, vl16
716; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
717; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
718; CHECK-NEXT:    fcmgt p1.h, p0/z, z1.h, z0.h
719; CHECK-NEXT:    mov z0.h, p1/z, #-1 // =0xffffffffffffffff
720; CHECK-NEXT:    st1h { z0.h }, p0, [x2]
721; CHECK-NEXT:    ret
722  %op1 = load <16 x half>, ptr %a
723  %op2 = load <16 x half>, ptr %b
724  %cmp = fcmp fast olt <16 x half> %op1, %op2
725  %sext = sext <16 x i1> %cmp to <16 x i16>
726  store <16 x i16> %sext, ptr %c
727  ret void
728}
729
730;
731; FCMP GE
732;
733
734define void @fcmp_ge_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
735; CHECK-LABEL: fcmp_ge_v16f16:
736; CHECK:       // %bb.0:
737; CHECK-NEXT:    ptrue p0.h, vl16
738; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
739; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
740; CHECK-NEXT:    fcmge p1.h, p0/z, z0.h, z1.h
741; CHECK-NEXT:    mov z0.h, p1/z, #-1 // =0xffffffffffffffff
742; CHECK-NEXT:    st1h { z0.h }, p0, [x2]
743; CHECK-NEXT:    ret
744  %op1 = load <16 x half>, ptr %a
745  %op2 = load <16 x half>, ptr %b
746  %cmp = fcmp fast oge <16 x half> %op1, %op2
747  %sext = sext <16 x i1> %cmp to <16 x i16>
748  store <16 x i16> %sext, ptr %c
749  ret void
750}
751
752;
753; FCMP LE
754;
755
756define void @fcmp_le_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 {
757; CHECK-LABEL: fcmp_le_v16f16:
758; CHECK:       // %bb.0:
759; CHECK-NEXT:    ptrue p0.h, vl16
760; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
761; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
762; CHECK-NEXT:    fcmge p1.h, p0/z, z1.h, z0.h
763; CHECK-NEXT:    mov z0.h, p1/z, #-1 // =0xffffffffffffffff
764; CHECK-NEXT:    st1h { z0.h }, p0, [x2]
765; CHECK-NEXT:    ret
766  %op1 = load <16 x half>, ptr %a
767  %op2 = load <16 x half>, ptr %b
768  %cmp = fcmp fast ole <16 x half> %op1, %op2
769  %sext = sext <16 x i1> %cmp to <16 x i16>
770  store <16 x i16> %sext, ptr %c
771  ret void
772}
773
774attributes #0 = { "target-features"="+sve" }
775