xref: /llvm-project/llvm/test/CodeGen/AArch64/vecreduce-bool.ll (revision c2bd5c25b3634e55089d34afe922aa38eee743e2)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK
3
4declare i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %a)
5declare i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %a)
6declare i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a)
7declare i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a)
8declare i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a)
9declare i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %a)
10
11declare i1 @llvm.vector.reduce.or.v1i1(<1 x i1> %a)
12declare i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %a)
13declare i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %a)
14declare i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %a)
15declare i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %a)
16declare i1 @llvm.vector.reduce.or.v32i1(<32 x i1> %a)
17
18declare i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> %a)
19declare i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> %a)
20declare i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %a)
21declare i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a)
22declare i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a)
23declare i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> %a)
24
25define i32 @reduce_and_v1i8(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind {
26; CHECK-LABEL: reduce_and_v1i8:
27; CHECK:       // %bb.0:
28; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
29; CHECK-NEXT:    smov w8, v0.b[0]
30; CHECK-NEXT:    cmp w8, #0
31; CHECK-NEXT:    csel w0, w0, w1, lt
32; CHECK-NEXT:    ret
33  %x = icmp slt <1 x i8> %a0, zeroinitializer
34  %y = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %x)
35  %z = select i1 %y, i32 %a1, i32 %a2
36  ret i32 %z
37}
38
39define i32 @reduce_and_v2i8(<2 x i8> %a0, i32 %a1, i32 %a2) nounwind {
40; CHECK-LABEL: reduce_and_v2i8:
41; CHECK:       // %bb.0:
42; CHECK-NEXT:    shl v0.2s, v0.2s, #24
43; CHECK-NEXT:    sshr v0.2s, v0.2s, #24
44; CHECK-NEXT:    cmlt v0.2s, v0.2s, #0
45; CHECK-NEXT:    uminp v0.2s, v0.2s, v0.2s
46; CHECK-NEXT:    fmov w8, s0
47; CHECK-NEXT:    tst w8, #0x1
48; CHECK-NEXT:    csel w0, w0, w1, ne
49; CHECK-NEXT:    ret
50  %x = icmp slt <2 x i8> %a0, zeroinitializer
51  %y = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %x)
52  %z = select i1 %y, i32 %a1, i32 %a2
53  ret i32 %z
54}
55
56define i32 @reduce_and_v4i8(<4 x i8> %a0, i32 %a1, i32 %a2) nounwind {
57; CHECK-LABEL: reduce_and_v4i8:
58; CHECK:       // %bb.0:
59; CHECK-NEXT:    shl v0.4h, v0.4h, #8
60; CHECK-NEXT:    sshr v0.4h, v0.4h, #8
61; CHECK-NEXT:    cmlt v0.4h, v0.4h, #0
62; CHECK-NEXT:    uminv h0, v0.4h
63; CHECK-NEXT:    fmov w8, s0
64; CHECK-NEXT:    tst w8, #0x1
65; CHECK-NEXT:    csel w0, w0, w1, ne
66; CHECK-NEXT:    ret
67  %x = icmp slt <4 x i8> %a0, zeroinitializer
68  %y = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %x)
69  %z = select i1 %y, i32 %a1, i32 %a2
70  ret i32 %z
71}
72
73define i32 @reduce_and_v8i8(<8 x i8> %a0, i32 %a1, i32 %a2) nounwind {
74; CHECK-LABEL: reduce_and_v8i8:
75; CHECK:       // %bb.0:
76; CHECK-NEXT:    cmlt v0.8b, v0.8b, #0
77; CHECK-NEXT:    uminv b0, v0.8b
78; CHECK-NEXT:    fmov w8, s0
79; CHECK-NEXT:    tst w8, #0x1
80; CHECK-NEXT:    csel w0, w0, w1, ne
81; CHECK-NEXT:    ret
82  %x = icmp slt <8 x i8> %a0, zeroinitializer
83  %y = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %x)
84  %z = select i1 %y, i32 %a1, i32 %a2
85  ret i32 %z
86}
87
88define i32 @reduce_and_v16i8(<16 x i8> %a0, i32 %a1, i32 %a2) nounwind {
89; CHECK-LABEL: reduce_and_v16i8:
90; CHECK:       // %bb.0:
91; CHECK-NEXT:    cmlt v0.16b, v0.16b, #0
92; CHECK-NEXT:    uminv b0, v0.16b
93; CHECK-NEXT:    fmov w8, s0
94; CHECK-NEXT:    tst w8, #0x1
95; CHECK-NEXT:    csel w0, w0, w1, ne
96; CHECK-NEXT:    ret
97  %x = icmp slt <16 x i8> %a0, zeroinitializer
98  %y = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %x)
99  %z = select i1 %y, i32 %a1, i32 %a2
100  ret i32 %z
101}
102
103define i32 @reduce_and_v32i8(<32 x i8> %a0, i32 %a1, i32 %a2) nounwind {
104; CHECK-LABEL: reduce_and_v32i8:
105; CHECK:       // %bb.0:
106; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
107; CHECK-NEXT:    cmlt v0.16b, v0.16b, #0
108; CHECK-NEXT:    uminv b0, v0.16b
109; CHECK-NEXT:    fmov w8, s0
110; CHECK-NEXT:    tst w8, #0x1
111; CHECK-NEXT:    csel w0, w0, w1, ne
112; CHECK-NEXT:    ret
113  %x = icmp slt <32 x i8> %a0, zeroinitializer
114  %y = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %x)
115  %z = select i1 %y, i32 %a1, i32 %a2
116  ret i32 %z
117}
118
119define i32 @reduce_and_v1i16(<1 x i16> %a0, i32 %a1, i32 %a2) nounwind {
120; CHECK-LABEL: reduce_and_v1i16:
121; CHECK:       // %bb.0:
122; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
123; CHECK-NEXT:    smov w8, v0.h[0]
124; CHECK-NEXT:    cmp w8, #0
125; CHECK-NEXT:    csel w0, w0, w1, lt
126; CHECK-NEXT:    ret
127  %x = icmp slt <1 x i16> %a0, zeroinitializer
128  %y = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %x)
129  %z = select i1 %y, i32 %a1, i32 %a2
130  ret i32 %z
131}
132
133define i32 @reduce_and_v2i16(<2 x i16> %a0, i32 %a1, i32 %a2) nounwind {
134; CHECK-LABEL: reduce_and_v2i16:
135; CHECK:       // %bb.0:
136; CHECK-NEXT:    shl v0.2s, v0.2s, #16
137; CHECK-NEXT:    sshr v0.2s, v0.2s, #16
138; CHECK-NEXT:    cmlt v0.2s, v0.2s, #0
139; CHECK-NEXT:    uminp v0.2s, v0.2s, v0.2s
140; CHECK-NEXT:    fmov w8, s0
141; CHECK-NEXT:    tst w8, #0x1
142; CHECK-NEXT:    csel w0, w0, w1, ne
143; CHECK-NEXT:    ret
144  %x = icmp slt <2 x i16> %a0, zeroinitializer
145  %y = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %x)
146  %z = select i1 %y, i32 %a1, i32 %a2
147  ret i32 %z
148}
149
150define i32 @reduce_and_v4i16(<4 x i16> %a0, i32 %a1, i32 %a2) nounwind {
151; CHECK-LABEL: reduce_and_v4i16:
152; CHECK:       // %bb.0:
153; CHECK-NEXT:    cmlt v0.4h, v0.4h, #0
154; CHECK-NEXT:    uminv h0, v0.4h
155; CHECK-NEXT:    fmov w8, s0
156; CHECK-NEXT:    tst w8, #0x1
157; CHECK-NEXT:    csel w0, w0, w1, ne
158; CHECK-NEXT:    ret
159  %x = icmp slt <4 x i16> %a0, zeroinitializer
160  %y = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %x)
161  %z = select i1 %y, i32 %a1, i32 %a2
162  ret i32 %z
163}
164
165define i32 @reduce_and_v8i16(<8 x i16> %a0, i32 %a1, i32 %a2) nounwind {
166; CHECK-LABEL: reduce_and_v8i16:
167; CHECK:       // %bb.0:
168; CHECK-NEXT:    cmlt v0.8h, v0.8h, #0
169; CHECK-NEXT:    uminv h0, v0.8h
170; CHECK-NEXT:    fmov w8, s0
171; CHECK-NEXT:    tst w8, #0x1
172; CHECK-NEXT:    csel w0, w0, w1, ne
173; CHECK-NEXT:    ret
174  %x = icmp slt <8 x i16> %a0, zeroinitializer
175  %y = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %x)
176  %z = select i1 %y, i32 %a1, i32 %a2
177  ret i32 %z
178}
179
180define i32 @reduce_and_v16i16(<16 x i16> %a0, i32 %a1, i32 %a2) nounwind {
181; CHECK-LABEL: reduce_and_v16i16:
182; CHECK:       // %bb.0:
183; CHECK-NEXT:    cmlt v1.8h, v1.8h, #0
184; CHECK-NEXT:    cmlt v0.8h, v0.8h, #0
185; CHECK-NEXT:    uzp1 v0.16b, v0.16b, v1.16b
186; CHECK-NEXT:    uminv b0, v0.16b
187; CHECK-NEXT:    fmov w8, s0
188; CHECK-NEXT:    tst w8, #0x1
189; CHECK-NEXT:    csel w0, w0, w1, ne
190; CHECK-NEXT:    ret
191  %x = icmp slt <16 x i16> %a0, zeroinitializer
192  %y = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %x)
193  %z = select i1 %y, i32 %a1, i32 %a2
194  ret i32 %z
195}
196
197define i32 @reduce_and_v1i32(<1 x i32> %a0, i32 %a1, i32 %a2) nounwind {
198; CHECK-LABEL: reduce_and_v1i32:
199; CHECK:       // %bb.0:
200; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
201; CHECK-NEXT:    fmov w8, s0
202; CHECK-NEXT:    cmp w8, #0
203; CHECK-NEXT:    csel w0, w0, w1, lt
204; CHECK-NEXT:    ret
205  %x = icmp slt <1 x i32> %a0, zeroinitializer
206  %y = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %x)
207  %z = select i1 %y, i32 %a1, i32 %a2
208  ret i32 %z
209}
210
211define i32 @reduce_and_v2i32(<2 x i32> %a0, i32 %a1, i32 %a2) nounwind {
212; CHECK-LABEL: reduce_and_v2i32:
213; CHECK:       // %bb.0:
214; CHECK-NEXT:    cmlt v0.2s, v0.2s, #0
215; CHECK-NEXT:    uminp v0.2s, v0.2s, v0.2s
216; CHECK-NEXT:    fmov w8, s0
217; CHECK-NEXT:    tst w8, #0x1
218; CHECK-NEXT:    csel w0, w0, w1, ne
219; CHECK-NEXT:    ret
220  %x = icmp slt <2 x i32> %a0, zeroinitializer
221  %y = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %x)
222  %z = select i1 %y, i32 %a1, i32 %a2
223  ret i32 %z
224}
225
226define i32 @reduce_and_v4i32(<4 x i32> %a0, i32 %a1, i32 %a2) nounwind {
227; CHECK-LABEL: reduce_and_v4i32:
228; CHECK:       // %bb.0:
229; CHECK-NEXT:    cmlt v0.4s, v0.4s, #0
230; CHECK-NEXT:    uminv s0, v0.4s
231; CHECK-NEXT:    fmov w8, s0
232; CHECK-NEXT:    tst w8, #0x1
233; CHECK-NEXT:    csel w0, w0, w1, ne
234; CHECK-NEXT:    ret
235  %x = icmp slt <4 x i32> %a0, zeroinitializer
236  %y = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %x)
237  %z = select i1 %y, i32 %a1, i32 %a2
238  ret i32 %z
239}
240
241define i32 @reduce_and_v8i32(<8 x i32> %a0, i32 %a1, i32 %a2) nounwind {
242; CHECK-LABEL: reduce_and_v8i32:
243; CHECK:       // %bb.0:
244; CHECK-NEXT:    cmlt v1.4s, v1.4s, #0
245; CHECK-NEXT:    cmlt v0.4s, v0.4s, #0
246; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
247; CHECK-NEXT:    uminv h0, v0.8h
248; CHECK-NEXT:    fmov w8, s0
249; CHECK-NEXT:    tst w8, #0x1
250; CHECK-NEXT:    csel w0, w0, w1, ne
251; CHECK-NEXT:    ret
252  %x = icmp slt <8 x i32> %a0, zeroinitializer
253  %y = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %x)
254  %z = select i1 %y, i32 %a1, i32 %a2
255  ret i32 %z
256}
257
258define i32 @reduce_and_v1i64(<1 x i64> %a0, i32 %a1, i32 %a2) nounwind {
259; CHECK-LABEL: reduce_and_v1i64:
260; CHECK:       // %bb.0:
261; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
262; CHECK-NEXT:    fmov x8, d0
263; CHECK-NEXT:    cmp x8, #0
264; CHECK-NEXT:    csel w0, w0, w1, lt
265; CHECK-NEXT:    ret
266  %x = icmp slt <1 x i64> %a0, zeroinitializer
267  %y = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %x)
268  %z = select i1 %y, i32 %a1, i32 %a2
269  ret i32 %z
270}
271
272define i32 @reduce_and_v2i64(<2 x i64> %a0, i32 %a1, i32 %a2) nounwind {
273; CHECK-LABEL: reduce_and_v2i64:
274; CHECK:       // %bb.0:
275; CHECK-NEXT:    cmlt v0.2d, v0.2d, #0
276; CHECK-NEXT:    uminv s0, v0.4s
277; CHECK-NEXT:    fmov w8, s0
278; CHECK-NEXT:    tst w8, #0x1
279; CHECK-NEXT:    csel w0, w0, w1, ne
280; CHECK-NEXT:    ret
281  %x = icmp slt <2 x i64> %a0, zeroinitializer
282  %y = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %x)
283  %z = select i1 %y, i32 %a1, i32 %a2
284  ret i32 %z
285}
286
287define i32 @reduce_and_v4i64(<4 x i64> %a0, i32 %a1, i32 %a2) nounwind {
288; CHECK-LABEL: reduce_and_v4i64:
289; CHECK:       // %bb.0:
290; CHECK-NEXT:    cmlt v1.2d, v1.2d, #0
291; CHECK-NEXT:    cmlt v0.2d, v0.2d, #0
292; CHECK-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
293; CHECK-NEXT:    uminv s0, v0.4s
294; CHECK-NEXT:    fmov w8, s0
295; CHECK-NEXT:    tst w8, #0x1
296; CHECK-NEXT:    csel w0, w0, w1, ne
297; CHECK-NEXT:    ret
298  %x = icmp slt <4 x i64> %a0, zeroinitializer
299  %y = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %x)
300  %z = select i1 %y, i32 %a1, i32 %a2
301  ret i32 %z
302}
303
304define i32 @reduce_or_v1i8(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind {
305; CHECK-LABEL: reduce_or_v1i8:
306; CHECK:       // %bb.0:
307; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
308; CHECK-NEXT:    smov w8, v0.b[0]
309; CHECK-NEXT:    cmp w8, #0
310; CHECK-NEXT:    csel w0, w0, w1, lt
311; CHECK-NEXT:    ret
312  %x = icmp slt <1 x i8> %a0, zeroinitializer
313  %y = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> %x)
314  %z = select i1 %y, i32 %a1, i32 %a2
315  ret i32 %z
316}
317
318define i32 @reduce_or_v2i8(<2 x i8> %a0, i32 %a1, i32 %a2) nounwind {
319; CHECK-LABEL: reduce_or_v2i8:
320; CHECK:       // %bb.0:
321; CHECK-NEXT:    shl v0.2s, v0.2s, #24
322; CHECK-NEXT:    sshr v0.2s, v0.2s, #24
323; CHECK-NEXT:    cmlt v0.2s, v0.2s, #0
324; CHECK-NEXT:    umaxp v0.2s, v0.2s, v0.2s
325; CHECK-NEXT:    fmov w8, s0
326; CHECK-NEXT:    tst w8, #0x1
327; CHECK-NEXT:    csel w0, w0, w1, ne
328; CHECK-NEXT:    ret
329  %x = icmp slt <2 x i8> %a0, zeroinitializer
330  %y = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %x)
331  %z = select i1 %y, i32 %a1, i32 %a2
332  ret i32 %z
333}
334
335define i32 @reduce_or_v4i8(<4 x i8> %a0, i32 %a1, i32 %a2) nounwind {
336; CHECK-LABEL: reduce_or_v4i8:
337; CHECK:       // %bb.0:
338; CHECK-NEXT:    shl v0.4h, v0.4h, #8
339; CHECK-NEXT:    sshr v0.4h, v0.4h, #8
340; CHECK-NEXT:    cmlt v0.4h, v0.4h, #0
341; CHECK-NEXT:    umaxv h0, v0.4h
342; CHECK-NEXT:    fmov w8, s0
343; CHECK-NEXT:    tst w8, #0x1
344; CHECK-NEXT:    csel w0, w0, w1, ne
345; CHECK-NEXT:    ret
346  %x = icmp slt <4 x i8> %a0, zeroinitializer
347  %y = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %x)
348  %z = select i1 %y, i32 %a1, i32 %a2
349  ret i32 %z
350}
351
352define i32 @reduce_or_v8i8(<8 x i8> %a0, i32 %a1, i32 %a2) nounwind {
353; CHECK-LABEL: reduce_or_v8i8:
354; CHECK:       // %bb.0:
355; CHECK-NEXT:    cmlt v0.8b, v0.8b, #0
356; CHECK-NEXT:    umaxv b0, v0.8b
357; CHECK-NEXT:    fmov w8, s0
358; CHECK-NEXT:    tst w8, #0x1
359; CHECK-NEXT:    csel w0, w0, w1, ne
360; CHECK-NEXT:    ret
361  %x = icmp slt <8 x i8> %a0, zeroinitializer
362  %y = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %x)
363  %z = select i1 %y, i32 %a1, i32 %a2
364  ret i32 %z
365}
366
367define i32 @reduce_or_v16i8(<16 x i8> %a0, i32 %a1, i32 %a2) nounwind {
368; CHECK-LABEL: reduce_or_v16i8:
369; CHECK:       // %bb.0:
370; CHECK-NEXT:    cmlt v0.16b, v0.16b, #0
371; CHECK-NEXT:    umaxv b0, v0.16b
372; CHECK-NEXT:    fmov w8, s0
373; CHECK-NEXT:    tst w8, #0x1
374; CHECK-NEXT:    csel w0, w0, w1, ne
375; CHECK-NEXT:    ret
376  %x = icmp slt <16 x i8> %a0, zeroinitializer
377  %y = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %x)
378  %z = select i1 %y, i32 %a1, i32 %a2
379  ret i32 %z
380}
381
382define i32 @reduce_or_v32i8(<32 x i8> %a0, i32 %a1, i32 %a2) nounwind {
383; CHECK-LABEL: reduce_or_v32i8:
384; CHECK:       // %bb.0:
385; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
386; CHECK-NEXT:    cmlt v0.16b, v0.16b, #0
387; CHECK-NEXT:    umaxv b0, v0.16b
388; CHECK-NEXT:    fmov w8, s0
389; CHECK-NEXT:    tst w8, #0x1
390; CHECK-NEXT:    csel w0, w0, w1, ne
391; CHECK-NEXT:    ret
392  %x = icmp slt <32 x i8> %a0, zeroinitializer
393  %y = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> %x)
394  %z = select i1 %y, i32 %a1, i32 %a2
395  ret i32 %z
396}
397
398define i32 @reduce_or_v1i16(<1 x i16> %a0, i32 %a1, i32 %a2) nounwind {
399; CHECK-LABEL: reduce_or_v1i16:
400; CHECK:       // %bb.0:
401; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
402; CHECK-NEXT:    smov w8, v0.h[0]
403; CHECK-NEXT:    cmp w8, #0
404; CHECK-NEXT:    csel w0, w0, w1, lt
405; CHECK-NEXT:    ret
406  %x = icmp slt <1 x i16> %a0, zeroinitializer
407  %y = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> %x)
408  %z = select i1 %y, i32 %a1, i32 %a2
409  ret i32 %z
410}
411
412define i32 @reduce_or_v2i16(<2 x i16> %a0, i32 %a1, i32 %a2) nounwind {
413; CHECK-LABEL: reduce_or_v2i16:
414; CHECK:       // %bb.0:
415; CHECK-NEXT:    shl v0.2s, v0.2s, #16
416; CHECK-NEXT:    sshr v0.2s, v0.2s, #16
417; CHECK-NEXT:    cmlt v0.2s, v0.2s, #0
418; CHECK-NEXT:    umaxp v0.2s, v0.2s, v0.2s
419; CHECK-NEXT:    fmov w8, s0
420; CHECK-NEXT:    tst w8, #0x1
421; CHECK-NEXT:    csel w0, w0, w1, ne
422; CHECK-NEXT:    ret
423  %x = icmp slt <2 x i16> %a0, zeroinitializer
424  %y = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %x)
425  %z = select i1 %y, i32 %a1, i32 %a2
426  ret i32 %z
427}
428
429define i32 @reduce_or_v4i16(<4 x i16> %a0, i32 %a1, i32 %a2) nounwind {
430; CHECK-LABEL: reduce_or_v4i16:
431; CHECK:       // %bb.0:
432; CHECK-NEXT:    cmlt v0.4h, v0.4h, #0
433; CHECK-NEXT:    umaxv h0, v0.4h
434; CHECK-NEXT:    fmov w8, s0
435; CHECK-NEXT:    tst w8, #0x1
436; CHECK-NEXT:    csel w0, w0, w1, ne
437; CHECK-NEXT:    ret
438  %x = icmp slt <4 x i16> %a0, zeroinitializer
439  %y = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %x)
440  %z = select i1 %y, i32 %a1, i32 %a2
441  ret i32 %z
442}
443
444define i32 @reduce_or_v8i16(<8 x i16> %a0, i32 %a1, i32 %a2) nounwind {
445; CHECK-LABEL: reduce_or_v8i16:
446; CHECK:       // %bb.0:
447; CHECK-NEXT:    cmlt v0.8h, v0.8h, #0
448; CHECK-NEXT:    umaxv h0, v0.8h
449; CHECK-NEXT:    fmov w8, s0
450; CHECK-NEXT:    tst w8, #0x1
451; CHECK-NEXT:    csel w0, w0, w1, ne
452; CHECK-NEXT:    ret
453  %x = icmp slt <8 x i16> %a0, zeroinitializer
454  %y = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %x)
455  %z = select i1 %y, i32 %a1, i32 %a2
456  ret i32 %z
457}
458
459define i32 @reduce_or_v16i16(<16 x i16> %a0, i32 %a1, i32 %a2) nounwind {
460; CHECK-LABEL: reduce_or_v16i16:
461; CHECK:       // %bb.0:
462; CHECK-NEXT:    cmlt v1.8h, v1.8h, #0
463; CHECK-NEXT:    cmlt v0.8h, v0.8h, #0
464; CHECK-NEXT:    uzp1 v0.16b, v0.16b, v1.16b
465; CHECK-NEXT:    umaxv b0, v0.16b
466; CHECK-NEXT:    fmov w8, s0
467; CHECK-NEXT:    tst w8, #0x1
468; CHECK-NEXT:    csel w0, w0, w1, ne
469; CHECK-NEXT:    ret
470  %x = icmp slt <16 x i16> %a0, zeroinitializer
471  %y = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %x)
472  %z = select i1 %y, i32 %a1, i32 %a2
473  ret i32 %z
474}
475
476define i32 @reduce_or_v1i32(<1 x i32> %a0, i32 %a1, i32 %a2) nounwind {
477; CHECK-LABEL: reduce_or_v1i32:
478; CHECK:       // %bb.0:
479; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
480; CHECK-NEXT:    fmov w8, s0
481; CHECK-NEXT:    cmp w8, #0
482; CHECK-NEXT:    csel w0, w0, w1, lt
483; CHECK-NEXT:    ret
484  %x = icmp slt <1 x i32> %a0, zeroinitializer
485  %y = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> %x)
486  %z = select i1 %y, i32 %a1, i32 %a2
487  ret i32 %z
488}
489
490define i32 @reduce_or_v2i32(<2 x i32> %a0, i32 %a1, i32 %a2) nounwind {
491; CHECK-LABEL: reduce_or_v2i32:
492; CHECK:       // %bb.0:
493; CHECK-NEXT:    cmlt v0.2s, v0.2s, #0
494; CHECK-NEXT:    umaxp v0.2s, v0.2s, v0.2s
495; CHECK-NEXT:    fmov w8, s0
496; CHECK-NEXT:    tst w8, #0x1
497; CHECK-NEXT:    csel w0, w0, w1, ne
498; CHECK-NEXT:    ret
499  %x = icmp slt <2 x i32> %a0, zeroinitializer
500  %y = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %x)
501  %z = select i1 %y, i32 %a1, i32 %a2
502  ret i32 %z
503}
504
505define i32 @reduce_or_v4i32(<4 x i32> %a0, i32 %a1, i32 %a2) nounwind {
506; CHECK-LABEL: reduce_or_v4i32:
507; CHECK:       // %bb.0:
508; CHECK-NEXT:    cmlt v0.4s, v0.4s, #0
509; CHECK-NEXT:    umaxv s0, v0.4s
510; CHECK-NEXT:    fmov w8, s0
511; CHECK-NEXT:    tst w8, #0x1
512; CHECK-NEXT:    csel w0, w0, w1, ne
513; CHECK-NEXT:    ret
514  %x = icmp slt <4 x i32> %a0, zeroinitializer
515  %y = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %x)
516  %z = select i1 %y, i32 %a1, i32 %a2
517  ret i32 %z
518}
519
520define i32 @reduce_or_v8i32(<8 x i32> %a0, i32 %a1, i32 %a2) nounwind {
521; CHECK-LABEL: reduce_or_v8i32:
522; CHECK:       // %bb.0:
523; CHECK-NEXT:    cmlt v1.4s, v1.4s, #0
524; CHECK-NEXT:    cmlt v0.4s, v0.4s, #0
525; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
526; CHECK-NEXT:    umaxv h0, v0.8h
527; CHECK-NEXT:    fmov w8, s0
528; CHECK-NEXT:    tst w8, #0x1
529; CHECK-NEXT:    csel w0, w0, w1, ne
530; CHECK-NEXT:    ret
531  %x = icmp slt <8 x i32> %a0, zeroinitializer
532  %y = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %x)
533  %z = select i1 %y, i32 %a1, i32 %a2
534  ret i32 %z
535}
536
537define i32 @reduce_or_v1i64(<1 x i64> %a0, i32 %a1, i32 %a2) nounwind {
538; CHECK-LABEL: reduce_or_v1i64:
539; CHECK:       // %bb.0:
540; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
541; CHECK-NEXT:    fmov x8, d0
542; CHECK-NEXT:    cmp x8, #0
543; CHECK-NEXT:    csel w0, w0, w1, lt
544; CHECK-NEXT:    ret
545  %x = icmp slt <1 x i64> %a0, zeroinitializer
546  %y = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> %x)
547  %z = select i1 %y, i32 %a1, i32 %a2
548  ret i32 %z
549}
550
551define i32 @reduce_or_v2i64(<2 x i64> %a0, i32 %a1, i32 %a2) nounwind {
552; CHECK-LABEL: reduce_or_v2i64:
553; CHECK:       // %bb.0:
554; CHECK-NEXT:    cmlt v0.2d, v0.2d, #0
555; CHECK-NEXT:    umaxv s0, v0.4s
556; CHECK-NEXT:    fmov w8, s0
557; CHECK-NEXT:    tst w8, #0x1
558; CHECK-NEXT:    csel w0, w0, w1, ne
559; CHECK-NEXT:    ret
560  %x = icmp slt <2 x i64> %a0, zeroinitializer
561  %y = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %x)
562  %z = select i1 %y, i32 %a1, i32 %a2
563  ret i32 %z
564}
565
566define i32 @reduce_or_v4i64(<4 x i64> %a0, i32 %a1, i32 %a2) nounwind {
567; CHECK-LABEL: reduce_or_v4i64:
568; CHECK:       // %bb.0:
569; CHECK-NEXT:    cmlt v1.2d, v1.2d, #0
570; CHECK-NEXT:    cmlt v0.2d, v0.2d, #0
571; CHECK-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
572; CHECK-NEXT:    umaxv s0, v0.4s
573; CHECK-NEXT:    fmov w8, s0
574; CHECK-NEXT:    tst w8, #0x1
575; CHECK-NEXT:    csel w0, w0, w1, ne
576; CHECK-NEXT:    ret
577  %x = icmp slt <4 x i64> %a0, zeroinitializer
578  %y = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %x)
579  %z = select i1 %y, i32 %a1, i32 %a2
580  ret i32 %z
581}
582
583define i32 @reduce_xor_v1i8(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind {
584; CHECK-LABEL: reduce_xor_v1i8:
585; CHECK:       // %bb.0:
586; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
587; CHECK-NEXT:    smov w8, v0.b[0]
588; CHECK-NEXT:    cmp w8, #0
589; CHECK-NEXT:    csel w0, w0, w1, lt
590; CHECK-NEXT:    ret
591  %x = icmp slt <1 x i8> %a0, zeroinitializer
592  %y = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> %x)
593  %z = select i1 %y, i32 %a1, i32 %a2
594  ret i32 %z
595}
596
597define i32 @reduce_xor_v2i8(<2 x i8> %a0, i32 %a1, i32 %a2) nounwind {
598; CHECK-LABEL: reduce_xor_v2i8:
599; CHECK:       // %bb.0:
600; CHECK-NEXT:    shl v0.2s, v0.2s, #24
601; CHECK-NEXT:    sshr v0.2s, v0.2s, #24
602; CHECK-NEXT:    cmlt v0.2s, v0.2s, #0
603; CHECK-NEXT:    addp v0.2s, v0.2s, v0.2s
604; CHECK-NEXT:    fmov w8, s0
605; CHECK-NEXT:    tst w8, #0x1
606; CHECK-NEXT:    csel w0, w0, w1, ne
607; CHECK-NEXT:    ret
608  %x = icmp slt <2 x i8> %a0, zeroinitializer
609  %y = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> %x)
610  %z = select i1 %y, i32 %a1, i32 %a2
611  ret i32 %z
612}
613
614define i32 @reduce_xor_v4i8(<4 x i8> %a0, i32 %a1, i32 %a2) nounwind {
615; CHECK-LABEL: reduce_xor_v4i8:
616; CHECK:       // %bb.0:
617; CHECK-NEXT:    shl v0.4h, v0.4h, #8
618; CHECK-NEXT:    sshr v0.4h, v0.4h, #8
619; CHECK-NEXT:    cmlt v0.4h, v0.4h, #0
620; CHECK-NEXT:    addv h0, v0.4h
621; CHECK-NEXT:    fmov w8, s0
622; CHECK-NEXT:    tst w8, #0x1
623; CHECK-NEXT:    csel w0, w0, w1, ne
624; CHECK-NEXT:    ret
625  %x = icmp slt <4 x i8> %a0, zeroinitializer
626  %y = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %x)
627  %z = select i1 %y, i32 %a1, i32 %a2
628  ret i32 %z
629}
630
631define i32 @reduce_xor_v8i8(<8 x i8> %a0, i32 %a1, i32 %a2) nounwind {
632; CHECK-LABEL: reduce_xor_v8i8:
633; CHECK:       // %bb.0:
634; CHECK-NEXT:    cmlt v0.8b, v0.8b, #0
635; CHECK-NEXT:    addv b0, v0.8b
636; CHECK-NEXT:    fmov w8, s0
637; CHECK-NEXT:    tst w8, #0x1
638; CHECK-NEXT:    csel w0, w0, w1, ne
639; CHECK-NEXT:    ret
640  %x = icmp slt <8 x i8> %a0, zeroinitializer
641  %y = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %x)
642  %z = select i1 %y, i32 %a1, i32 %a2
643  ret i32 %z
644}
645
646define i32 @reduce_xor_v16i8(<16 x i8> %a0, i32 %a1, i32 %a2) nounwind {
647; CHECK-LABEL: reduce_xor_v16i8:
648; CHECK:       // %bb.0:
649; CHECK-NEXT:    cmlt v0.16b, v0.16b, #0
650; CHECK-NEXT:    addv b0, v0.16b
651; CHECK-NEXT:    fmov w8, s0
652; CHECK-NEXT:    tst w8, #0x1
653; CHECK-NEXT:    csel w0, w0, w1, ne
654; CHECK-NEXT:    ret
655  %x = icmp slt <16 x i8> %a0, zeroinitializer
656  %y = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %x)
657  %z = select i1 %y, i32 %a1, i32 %a2
658  ret i32 %z
659}
660
661define i32 @reduce_xor_v32i8(<32 x i8> %a0, i32 %a1, i32 %a2) nounwind {
662; CHECK-LABEL: reduce_xor_v32i8:
663; CHECK:       // %bb.0:
664; CHECK-NEXT:    cmlt v1.16b, v1.16b, #0
665; CHECK-NEXT:    cmlt v0.16b, v0.16b, #0
666; CHECK-NEXT:    eor v0.16b, v0.16b, v1.16b
667; CHECK-NEXT:    addv b0, v0.16b
668; CHECK-NEXT:    fmov w8, s0
669; CHECK-NEXT:    tst w8, #0x1
670; CHECK-NEXT:    csel w0, w0, w1, ne
671; CHECK-NEXT:    ret
672  %x = icmp slt <32 x i8> %a0, zeroinitializer
673  %y = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> %x)
674  %z = select i1 %y, i32 %a1, i32 %a2
675  ret i32 %z
676}
677
678define i32 @reduce_xor_v1i16(<1 x i16> %a0, i32 %a1, i32 %a2) nounwind {
679; CHECK-LABEL: reduce_xor_v1i16:
680; CHECK:       // %bb.0:
681; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
682; CHECK-NEXT:    smov w8, v0.h[0]
683; CHECK-NEXT:    cmp w8, #0
684; CHECK-NEXT:    csel w0, w0, w1, lt
685; CHECK-NEXT:    ret
686  %x = icmp slt <1 x i16> %a0, zeroinitializer
687  %y = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> %x)
688  %z = select i1 %y, i32 %a1, i32 %a2
689  ret i32 %z
690}
691
692define i32 @reduce_xor_v2i16(<2 x i16> %a0, i32 %a1, i32 %a2) nounwind {
693; CHECK-LABEL: reduce_xor_v2i16:
694; CHECK:       // %bb.0:
695; CHECK-NEXT:    shl v0.2s, v0.2s, #16
696; CHECK-NEXT:    sshr v0.2s, v0.2s, #16
697; CHECK-NEXT:    cmlt v0.2s, v0.2s, #0
698; CHECK-NEXT:    addp v0.2s, v0.2s, v0.2s
699; CHECK-NEXT:    fmov w8, s0
700; CHECK-NEXT:    tst w8, #0x1
701; CHECK-NEXT:    csel w0, w0, w1, ne
702; CHECK-NEXT:    ret
703  %x = icmp slt <2 x i16> %a0, zeroinitializer
704  %y = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> %x)
705  %z = select i1 %y, i32 %a1, i32 %a2
706  ret i32 %z
707}
708
709define i32 @reduce_xor_v4i16(<4 x i16> %a0, i32 %a1, i32 %a2) nounwind {
710; CHECK-LABEL: reduce_xor_v4i16:
711; CHECK:       // %bb.0:
712; CHECK-NEXT:    cmlt v0.4h, v0.4h, #0
713; CHECK-NEXT:    addv h0, v0.4h
714; CHECK-NEXT:    fmov w8, s0
715; CHECK-NEXT:    tst w8, #0x1
716; CHECK-NEXT:    csel w0, w0, w1, ne
717; CHECK-NEXT:    ret
718  %x = icmp slt <4 x i16> %a0, zeroinitializer
719  %y = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %x)
720  %z = select i1 %y, i32 %a1, i32 %a2
721  ret i32 %z
722}
723
724define i32 @reduce_xor_v8i16(<8 x i16> %a0, i32 %a1, i32 %a2) nounwind {
725; CHECK-LABEL: reduce_xor_v8i16:
726; CHECK:       // %bb.0:
727; CHECK-NEXT:    cmlt v0.8h, v0.8h, #0
728; CHECK-NEXT:    addv h0, v0.8h
729; CHECK-NEXT:    fmov w8, s0
730; CHECK-NEXT:    tst w8, #0x1
731; CHECK-NEXT:    csel w0, w0, w1, ne
732; CHECK-NEXT:    ret
733  %x = icmp slt <8 x i16> %a0, zeroinitializer
734  %y = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %x)
735  %z = select i1 %y, i32 %a1, i32 %a2
736  ret i32 %z
737}
738
739define i32 @reduce_xor_v16i16(<16 x i16> %a0, i32 %a1, i32 %a2) nounwind {
740; CHECK-LABEL: reduce_xor_v16i16:
741; CHECK:       // %bb.0:
742; CHECK-NEXT:    cmlt v1.8h, v1.8h, #0
743; CHECK-NEXT:    cmlt v0.8h, v0.8h, #0
744; CHECK-NEXT:    uzp1 v0.16b, v0.16b, v1.16b
745; CHECK-NEXT:    addv b0, v0.16b
746; CHECK-NEXT:    fmov w8, s0
747; CHECK-NEXT:    tst w8, #0x1
748; CHECK-NEXT:    csel w0, w0, w1, ne
749; CHECK-NEXT:    ret
750  %x = icmp slt <16 x i16> %a0, zeroinitializer
751  %y = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %x)
752  %z = select i1 %y, i32 %a1, i32 %a2
753  ret i32 %z
754}
755
756define i32 @reduce_xor_v1i32(<1 x i32> %a0, i32 %a1, i32 %a2) nounwind {
757; CHECK-LABEL: reduce_xor_v1i32:
758; CHECK:       // %bb.0:
759; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
760; CHECK-NEXT:    fmov w8, s0
761; CHECK-NEXT:    cmp w8, #0
762; CHECK-NEXT:    csel w0, w0, w1, lt
763; CHECK-NEXT:    ret
764  %x = icmp slt <1 x i32> %a0, zeroinitializer
765  %y = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> %x)
766  %z = select i1 %y, i32 %a1, i32 %a2
767  ret i32 %z
768}
769
770define i32 @reduce_xor_v2i32(<2 x i32> %a0, i32 %a1, i32 %a2) nounwind {
771; CHECK-LABEL: reduce_xor_v2i32:
772; CHECK:       // %bb.0:
773; CHECK-NEXT:    cmlt v0.2s, v0.2s, #0
774; CHECK-NEXT:    addp v0.2s, v0.2s, v0.2s
775; CHECK-NEXT:    fmov w8, s0
776; CHECK-NEXT:    tst w8, #0x1
777; CHECK-NEXT:    csel w0, w0, w1, ne
778; CHECK-NEXT:    ret
779  %x = icmp slt <2 x i32> %a0, zeroinitializer
780  %y = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> %x)
781  %z = select i1 %y, i32 %a1, i32 %a2
782  ret i32 %z
783}
784
785define i32 @reduce_xor_v4i32(<4 x i32> %a0, i32 %a1, i32 %a2) nounwind {
786; CHECK-LABEL: reduce_xor_v4i32:
787; CHECK:       // %bb.0:
788; CHECK-NEXT:    cmlt v0.4s, v0.4s, #0
789; CHECK-NEXT:    addv s0, v0.4s
790; CHECK-NEXT:    fmov w8, s0
791; CHECK-NEXT:    tst w8, #0x1
792; CHECK-NEXT:    csel w0, w0, w1, ne
793; CHECK-NEXT:    ret
794  %x = icmp slt <4 x i32> %a0, zeroinitializer
795  %y = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %x)
796  %z = select i1 %y, i32 %a1, i32 %a2
797  ret i32 %z
798}
799
800define i32 @reduce_xor_v8i32(<8 x i32> %a0, i32 %a1, i32 %a2) nounwind {
801; CHECK-LABEL: reduce_xor_v8i32:
802; CHECK:       // %bb.0:
803; CHECK-NEXT:    cmlt v1.4s, v1.4s, #0
804; CHECK-NEXT:    cmlt v0.4s, v0.4s, #0
805; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
806; CHECK-NEXT:    addv h0, v0.8h
807; CHECK-NEXT:    fmov w8, s0
808; CHECK-NEXT:    tst w8, #0x1
809; CHECK-NEXT:    csel w0, w0, w1, ne
810; CHECK-NEXT:    ret
811  %x = icmp slt <8 x i32> %a0, zeroinitializer
812  %y = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %x)
813  %z = select i1 %y, i32 %a1, i32 %a2
814  ret i32 %z
815}
816
817define i32 @reduce_xor_v1i64(<1 x i64> %a0, i32 %a1, i32 %a2) nounwind {
818; CHECK-LABEL: reduce_xor_v1i64:
819; CHECK:       // %bb.0:
820; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
821; CHECK-NEXT:    fmov x8, d0
822; CHECK-NEXT:    cmp x8, #0
823; CHECK-NEXT:    csel w0, w0, w1, lt
824; CHECK-NEXT:    ret
825  %x = icmp slt <1 x i64> %a0, zeroinitializer
826  %y = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> %x)
827  %z = select i1 %y, i32 %a1, i32 %a2
828  ret i32 %z
829}
830
831define i32 @reduce_xor_v2i64(<2 x i64> %a0, i32 %a1, i32 %a2) nounwind {
832; CHECK-LABEL: reduce_xor_v2i64:
833; CHECK:       // %bb.0:
834; CHECK-NEXT:    cmlt v0.2d, v0.2d, #0
835; CHECK-NEXT:    addp d0, v0.2d
836; CHECK-NEXT:    fmov w8, s0
837; CHECK-NEXT:    tst w8, #0x1
838; CHECK-NEXT:    csel w0, w0, w1, ne
839; CHECK-NEXT:    ret
840  %x = icmp slt <2 x i64> %a0, zeroinitializer
841  %y = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> %x)
842  %z = select i1 %y, i32 %a1, i32 %a2
843  ret i32 %z
844}
845
846define i32 @reduce_xor_v4i64(<4 x i64> %a0, i32 %a1, i32 %a2) nounwind {
847; CHECK-LABEL: reduce_xor_v4i64:
848; CHECK:       // %bb.0:
849; CHECK-NEXT:    cmlt v1.2d, v1.2d, #0
850; CHECK-NEXT:    cmlt v0.2d, v0.2d, #0
851; CHECK-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
852; CHECK-NEXT:    addv s0, v0.4s
853; CHECK-NEXT:    fmov w8, s0
854; CHECK-NEXT:    tst w8, #0x1
855; CHECK-NEXT:    csel w0, w0, w1, ne
856; CHECK-NEXT:    ret
857  %x = icmp slt <4 x i64> %a0, zeroinitializer
858  %y = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %x)
859  %z = select i1 %y, i32 %a1, i32 %a2
860  ret i32 %z
861}
862