xref: /llvm-project/llvm/test/CodeGen/Thumb2/mve-vecreduce-addpred.ll (revision 7b3bbd83c0c24087072ec5b22a76799ab31f87d5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK
3
4define arm_aapcs_vfpcc i32 @add_v4i32_v4i32(<4 x i32> %x, <4 x i32> %b) {
5; CHECK-LABEL: add_v4i32_v4i32:
6; CHECK:       @ %bb.0: @ %entry
7; CHECK-NEXT:    vpt.i32 eq, q1, zr
8; CHECK-NEXT:    vaddvt.u32 r0, q0
9; CHECK-NEXT:    bx lr
10entry:
11  %c = icmp eq <4 x i32> %b, zeroinitializer
12  %s = select <4 x i1> %c, <4 x i32> %x, <4 x i32> zeroinitializer
13  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
14  ret i32 %z
15}
16
17define arm_aapcs_vfpcc i64 @add_v4i32_v4i64_zext(<4 x i32> %x, <4 x i32> %b) {
18; CHECK-LABEL: add_v4i32_v4i64_zext:
19; CHECK:       @ %bb.0: @ %entry
20; CHECK-NEXT:    vpt.i32 eq, q1, zr
21; CHECK-NEXT:    vaddlvt.u32 r0, r1, q0
22; CHECK-NEXT:    bx lr
23entry:
24  %c = icmp eq <4 x i32> %b, zeroinitializer
25  %xx = zext <4 x i32> %x to <4 x i64>
26  %s = select <4 x i1> %c, <4 x i64> %xx, <4 x i64> zeroinitializer
27  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s)
28  ret i64 %z
29}
30
31define arm_aapcs_vfpcc i64 @add_v4i32_v4i64_sext(<4 x i32> %x, <4 x i32> %b) {
32; CHECK-LABEL: add_v4i32_v4i64_sext:
33; CHECK:       @ %bb.0: @ %entry
34; CHECK-NEXT:    vpt.i32 eq, q1, zr
35; CHECK-NEXT:    vaddlvt.s32 r0, r1, q0
36; CHECK-NEXT:    bx lr
37entry:
38  %c = icmp eq <4 x i32> %b, zeroinitializer
39  %xx = sext <4 x i32> %x to <4 x i64>
40  %s = select <4 x i1> %c, <4 x i64> %xx, <4 x i64> zeroinitializer
41  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s)
42  ret i64 %z
43}
44
45define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_zext(<2 x i32> %x, <2 x i32> %b) {
46; CHECK-LABEL: add_v2i32_v2i64_zext:
47; CHECK:       @ %bb.0: @ %entry
48; CHECK-NEXT:    vmov r0, s4
49; CHECK-NEXT:    movs r1, #0
50; CHECK-NEXT:    vmov.i64 q2, #0xffffffff
51; CHECK-NEXT:    vand q0, q0, q2
52; CHECK-NEXT:    cmp r0, #0
53; CHECK-NEXT:    csetm r0, eq
54; CHECK-NEXT:    bfi r1, r0, #0, #8
55; CHECK-NEXT:    vmov r0, s6
56; CHECK-NEXT:    vmov.i32 q1, #0x0
57; CHECK-NEXT:    cmp r0, #0
58; CHECK-NEXT:    csetm r0, eq
59; CHECK-NEXT:    bfi r1, r0, #8, #8
60; CHECK-NEXT:    vmsr p0, r1
61; CHECK-NEXT:    vpsel q0, q0, q1
62; CHECK-NEXT:    vmov r0, r1, d1
63; CHECK-NEXT:    vmov r2, r3, d0
64; CHECK-NEXT:    adds r0, r0, r2
65; CHECK-NEXT:    adcs r1, r3
66; CHECK-NEXT:    bx lr
67entry:
68  %c = icmp eq <2 x i32> %b, zeroinitializer
69  %xx = zext <2 x i32> %x to <2 x i64>
70  %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
71  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
72  ret i64 %z
73}
74
75define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_sext(<2 x i32> %x, <2 x i32> %b) {
76; CHECK-LABEL: add_v2i32_v2i64_sext:
77; CHECK:       @ %bb.0: @ %entry
78; CHECK-NEXT:    vmov r0, s2
79; CHECK-NEXT:    vmov r1, s0
80; CHECK-NEXT:    vmov q0[2], q0[0], r1, r0
81; CHECK-NEXT:    asrs r0, r0, #31
82; CHECK-NEXT:    asrs r1, r1, #31
83; CHECK-NEXT:    vmov q0[3], q0[1], r1, r0
84; CHECK-NEXT:    vmov r0, s4
85; CHECK-NEXT:    movs r1, #0
86; CHECK-NEXT:    cmp r0, #0
87; CHECK-NEXT:    csetm r0, eq
88; CHECK-NEXT:    bfi r1, r0, #0, #8
89; CHECK-NEXT:    vmov r0, s6
90; CHECK-NEXT:    vmov.i32 q1, #0x0
91; CHECK-NEXT:    cmp r0, #0
92; CHECK-NEXT:    csetm r0, eq
93; CHECK-NEXT:    bfi r1, r0, #8, #8
94; CHECK-NEXT:    vmsr p0, r1
95; CHECK-NEXT:    vpsel q0, q0, q1
96; CHECK-NEXT:    vmov r0, r1, d1
97; CHECK-NEXT:    vmov r2, r3, d0
98; CHECK-NEXT:    adds r0, r0, r2
99; CHECK-NEXT:    adcs r1, r3
100; CHECK-NEXT:    bx lr
101entry:
102  %c = icmp eq <2 x i32> %b, zeroinitializer
103  %xx = sext <2 x i32> %x to <2 x i64>
104  %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
105  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
106  ret i64 %z
107}
108
109define arm_aapcs_vfpcc i32 @add_v8i16_v8i32_zext(<8 x i16> %x, <8 x i16> %b) {
110; CHECK-LABEL: add_v8i16_v8i32_zext:
111; CHECK:       @ %bb.0: @ %entry
112; CHECK-NEXT:    vpt.i16 eq, q1, zr
113; CHECK-NEXT:    vaddvt.u16 r0, q0
114; CHECK-NEXT:    bx lr
115entry:
116  %c = icmp eq <8 x i16> %b, zeroinitializer
117  %xx = zext <8 x i16> %x to <8 x i32>
118  %s = select <8 x i1> %c, <8 x i32> %xx, <8 x i32> zeroinitializer
119  %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %s)
120  ret i32 %z
121}
122
123define arm_aapcs_vfpcc i32 @add_v8i16_v8i32_sext(<8 x i16> %x, <8 x i16> %b) {
124; CHECK-LABEL: add_v8i16_v8i32_sext:
125; CHECK:       @ %bb.0: @ %entry
126; CHECK-NEXT:    vpt.i16 eq, q1, zr
127; CHECK-NEXT:    vaddvt.s16 r0, q0
128; CHECK-NEXT:    bx lr
129entry:
130  %c = icmp eq <8 x i16> %b, zeroinitializer
131  %xx = sext <8 x i16> %x to <8 x i32>
132  %s = select <8 x i1> %c, <8 x i32> %xx, <8 x i32> zeroinitializer
133  %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %s)
134  ret i32 %z
135}
136
137define arm_aapcs_vfpcc i32 @add_v4i16_v4i32_zext(<4 x i16> %x, <4 x i16> %b) {
138; CHECK-LABEL: add_v4i16_v4i32_zext:
139; CHECK:       @ %bb.0: @ %entry
140; CHECK-NEXT:    vmovlb.u16 q1, q1
141; CHECK-NEXT:    vmovlb.u16 q0, q0
142; CHECK-NEXT:    vpt.i32 eq, q1, zr
143; CHECK-NEXT:    vaddvt.u32 r0, q0
144; CHECK-NEXT:    bx lr
145entry:
146  %c = icmp eq <4 x i16> %b, zeroinitializer
147  %xx = zext <4 x i16> %x to <4 x i32>
148  %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer
149  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
150  ret i32 %z
151}
152
153define arm_aapcs_vfpcc i32 @add_v4i16_v4i32_sext(<4 x i16> %x, <4 x i16> %b) {
154; CHECK-LABEL: add_v4i16_v4i32_sext:
155; CHECK:       @ %bb.0: @ %entry
156; CHECK-NEXT:    vmovlb.u16 q1, q1
157; CHECK-NEXT:    vmovlb.s16 q0, q0
158; CHECK-NEXT:    vpt.i32 eq, q1, zr
159; CHECK-NEXT:    vaddvt.u32 r0, q0
160; CHECK-NEXT:    bx lr
161entry:
162  %c = icmp eq <4 x i16> %b, zeroinitializer
163  %xx = sext <4 x i16> %x to <4 x i32>
164  %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer
165  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
166  ret i32 %z
167}
168
169define arm_aapcs_vfpcc zeroext i16 @add_v8i16_v8i16(<8 x i16> %x, <8 x i16> %b) {
170; CHECK-LABEL: add_v8i16_v8i16:
171; CHECK:       @ %bb.0: @ %entry
172; CHECK-NEXT:    vpt.i16 eq, q1, zr
173; CHECK-NEXT:    vaddvt.u16 r0, q0
174; CHECK-NEXT:    uxth r0, r0
175; CHECK-NEXT:    bx lr
176entry:
177  %c = icmp eq <8 x i16> %b, zeroinitializer
178  %s = select <8 x i1> %c, <8 x i16> %x, <8 x i16> zeroinitializer
179  %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s)
180  ret i16 %z
181}
182
183define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_zext(<8 x i16> %x, <8 x i16> %b) {
184; CHECK-LABEL: add_v8i16_v8i64_zext:
185; CHECK:       @ %bb.0: @ %entry
186; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
187; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
188; CHECK-NEXT:    vmov.i8 q3, #0x0
189; CHECK-NEXT:    vmov.i8 q4, #0xff
190; CHECK-NEXT:    vcmp.i16 eq, q1, zr
191; CHECK-NEXT:    vpsel q5, q4, q3
192; CHECK-NEXT:    vmov.u16 r0, q5[2]
193; CHECK-NEXT:    vmov.u16 r1, q5[0]
194; CHECK-NEXT:    vmov q1[2], q1[0], r1, r0
195; CHECK-NEXT:    vmov.u16 r0, q5[3]
196; CHECK-NEXT:    vmov.u16 r1, q5[1]
197; CHECK-NEXT:    vmov q1[3], q1[1], r1, r0
198; CHECK-NEXT:    vcmp.i32 ne, q1, zr
199; CHECK-NEXT:    vpsel q6, q4, q3
200; CHECK-NEXT:    vmov r0, r1, d12
201; CHECK-NEXT:    vmov q1[2], q1[0], r0, r1
202; CHECK-NEXT:    vmov q1[3], q1[1], r0, r1
203; CHECK-NEXT:    vmov.u16 r0, q0[1]
204; CHECK-NEXT:    vmov.u16 r1, q0[0]
205; CHECK-NEXT:    vcmp.i32 ne, q1, zr
206; CHECK-NEXT:    vmov q2[2], q2[0], r1, r0
207; CHECK-NEXT:    vmov.i64 q1, #0xffff
208; CHECK-NEXT:    vand q7, q2, q1
209; CHECK-NEXT:    vmov.i32 q2, #0x0
210; CHECK-NEXT:    vpsel q7, q7, q2
211; CHECK-NEXT:    vmov r0, r1, d15
212; CHECK-NEXT:    vmov r2, r3, d14
213; CHECK-NEXT:    orrs r1, r3
214; CHECK-NEXT:    add r0, r2
215; CHECK-NEXT:    vmov r2, r3, d13
216; CHECK-NEXT:    vmov q6[2], q6[0], r2, r3
217; CHECK-NEXT:    vmov q6[3], q6[1], r2, r3
218; CHECK-NEXT:    vmov.u16 r2, q0[3]
219; CHECK-NEXT:    vmov.u16 r3, q0[2]
220; CHECK-NEXT:    vcmp.i32 ne, q6, zr
221; CHECK-NEXT:    vmov q6[2], q6[0], r3, r2
222; CHECK-NEXT:    vand q6, q6, q1
223; CHECK-NEXT:    vpsel q6, q6, q2
224; CHECK-NEXT:    vmov r2, r3, d12
225; CHECK-NEXT:    adds r0, r0, r2
226; CHECK-NEXT:    adcs r1, r3
227; CHECK-NEXT:    vmov r2, r3, d13
228; CHECK-NEXT:    adds r0, r0, r2
229; CHECK-NEXT:    vmov.u16 r2, q5[6]
230; CHECK-NEXT:    adcs r1, r3
231; CHECK-NEXT:    vmov.u16 r3, q5[4]
232; CHECK-NEXT:    vmov q6[2], q6[0], r3, r2
233; CHECK-NEXT:    vmov.u16 r2, q5[7]
234; CHECK-NEXT:    vmov.u16 r3, q5[5]
235; CHECK-NEXT:    vmov q6[3], q6[1], r3, r2
236; CHECK-NEXT:    vcmp.i32 ne, q6, zr
237; CHECK-NEXT:    vpsel q3, q4, q3
238; CHECK-NEXT:    vmov r2, r3, d6
239; CHECK-NEXT:    vmov q4[2], q4[0], r2, r3
240; CHECK-NEXT:    vmov q4[3], q4[1], r2, r3
241; CHECK-NEXT:    vmov.u16 r2, q0[5]
242; CHECK-NEXT:    vmov.u16 r3, q0[4]
243; CHECK-NEXT:    vcmp.i32 ne, q4, zr
244; CHECK-NEXT:    vmov q4[2], q4[0], r3, r2
245; CHECK-NEXT:    vand q4, q4, q1
246; CHECK-NEXT:    vpsel q4, q4, q2
247; CHECK-NEXT:    vmov r2, r3, d8
248; CHECK-NEXT:    adds r0, r0, r2
249; CHECK-NEXT:    adcs r1, r3
250; CHECK-NEXT:    vmov r2, r3, d9
251; CHECK-NEXT:    adds r0, r0, r2
252; CHECK-NEXT:    adcs r1, r3
253; CHECK-NEXT:    vmov r2, r3, d7
254; CHECK-NEXT:    vmov q3[2], q3[0], r2, r3
255; CHECK-NEXT:    vmov q3[3], q3[1], r2, r3
256; CHECK-NEXT:    vmov.u16 r2, q0[7]
257; CHECK-NEXT:    vmov.u16 r3, q0[6]
258; CHECK-NEXT:    vcmp.i32 ne, q3, zr
259; CHECK-NEXT:    vmov q0[2], q0[0], r3, r2
260; CHECK-NEXT:    vand q0, q0, q1
261; CHECK-NEXT:    vpsel q0, q0, q2
262; CHECK-NEXT:    vmov r2, r3, d0
263; CHECK-NEXT:    adds r0, r0, r2
264; CHECK-NEXT:    adcs r1, r3
265; CHECK-NEXT:    vmov r2, r3, d1
266; CHECK-NEXT:    adds r0, r0, r2
267; CHECK-NEXT:    adcs r1, r3
268; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
269; CHECK-NEXT:    bx lr
270entry:
271  %c = icmp eq <8 x i16> %b, zeroinitializer
272  %xx = zext <8 x i16> %x to <8 x i64>
273  %s = select <8 x i1> %c, <8 x i64> %xx, <8 x i64> zeroinitializer
274  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s)
275  ret i64 %z
276}
277
278define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_sext(<8 x i16> %x, <8 x i16> %b) {
279; CHECK-LABEL: add_v8i16_v8i64_sext:
280; CHECK:       @ %bb.0: @ %entry
281; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
282; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
283; CHECK-NEXT:    vmov.i8 q2, #0x0
284; CHECK-NEXT:    vmov.i8 q3, #0xff
285; CHECK-NEXT:    vcmp.i16 eq, q1, zr
286; CHECK-NEXT:    vpsel q4, q3, q2
287; CHECK-NEXT:    vmov.u16 r0, q4[2]
288; CHECK-NEXT:    vmov.u16 r1, q4[0]
289; CHECK-NEXT:    vmov q1[2], q1[0], r1, r0
290; CHECK-NEXT:    vmov.u16 r0, q4[3]
291; CHECK-NEXT:    vmov.u16 r1, q4[1]
292; CHECK-NEXT:    vmov q1[3], q1[1], r1, r0
293; CHECK-NEXT:    vcmp.i32 ne, q1, zr
294; CHECK-NEXT:    vpsel q5, q3, q2
295; CHECK-NEXT:    vmov r0, r1, d10
296; CHECK-NEXT:    vmov q1[2], q1[0], r0, r1
297; CHECK-NEXT:    vmov q1[3], q1[1], r0, r1
298; CHECK-NEXT:    vmov.s16 r0, q0[1]
299; CHECK-NEXT:    vmov.s16 r1, q0[0]
300; CHECK-NEXT:    vcmp.i32 ne, q1, zr
301; CHECK-NEXT:    vmov q6[2], q6[0], r1, r0
302; CHECK-NEXT:    asrs r0, r0, #31
303; CHECK-NEXT:    asrs r1, r1, #31
304; CHECK-NEXT:    vmov.i32 q1, #0x0
305; CHECK-NEXT:    vmov q6[3], q6[1], r1, r0
306; CHECK-NEXT:    vpsel q6, q6, q1
307; CHECK-NEXT:    vmov r0, r1, d13
308; CHECK-NEXT:    vmov r2, r3, d12
309; CHECK-NEXT:    adds r0, r0, r2
310; CHECK-NEXT:    adcs r1, r3
311; CHECK-NEXT:    vmov r2, r3, d11
312; CHECK-NEXT:    vmov q5[2], q5[0], r2, r3
313; CHECK-NEXT:    vmov q5[3], q5[1], r2, r3
314; CHECK-NEXT:    vmov.s16 r2, q0[3]
315; CHECK-NEXT:    vmov.s16 r3, q0[2]
316; CHECK-NEXT:    vcmp.i32 ne, q5, zr
317; CHECK-NEXT:    vmov q5[2], q5[0], r3, r2
318; CHECK-NEXT:    asrs r2, r2, #31
319; CHECK-NEXT:    asrs r3, r3, #31
320; CHECK-NEXT:    vmov q5[3], q5[1], r3, r2
321; CHECK-NEXT:    vpsel q5, q5, q1
322; CHECK-NEXT:    vmov r2, r3, d10
323; CHECK-NEXT:    adds r0, r0, r2
324; CHECK-NEXT:    adcs r1, r3
325; CHECK-NEXT:    vmov r2, r3, d11
326; CHECK-NEXT:    adds r0, r0, r2
327; CHECK-NEXT:    vmov.u16 r2, q4[6]
328; CHECK-NEXT:    adcs r1, r3
329; CHECK-NEXT:    vmov.u16 r3, q4[4]
330; CHECK-NEXT:    vmov q5[2], q5[0], r3, r2
331; CHECK-NEXT:    vmov.u16 r2, q4[7]
332; CHECK-NEXT:    vmov.u16 r3, q4[5]
333; CHECK-NEXT:    vmov q5[3], q5[1], r3, r2
334; CHECK-NEXT:    vcmp.i32 ne, q5, zr
335; CHECK-NEXT:    vpsel q2, q3, q2
336; CHECK-NEXT:    vmov r2, r3, d4
337; CHECK-NEXT:    vmov q3[2], q3[0], r2, r3
338; CHECK-NEXT:    vmov q3[3], q3[1], r2, r3
339; CHECK-NEXT:    vmov.s16 r2, q0[5]
340; CHECK-NEXT:    vmov.s16 r3, q0[4]
341; CHECK-NEXT:    vcmp.i32 ne, q3, zr
342; CHECK-NEXT:    vmov q3[2], q3[0], r3, r2
343; CHECK-NEXT:    asrs r2, r2, #31
344; CHECK-NEXT:    asrs r3, r3, #31
345; CHECK-NEXT:    vmov q3[3], q3[1], r3, r2
346; CHECK-NEXT:    vpsel q3, q3, q1
347; CHECK-NEXT:    vmov r2, r3, d6
348; CHECK-NEXT:    adds r0, r0, r2
349; CHECK-NEXT:    adcs r1, r3
350; CHECK-NEXT:    vmov r2, r3, d7
351; CHECK-NEXT:    adds r0, r0, r2
352; CHECK-NEXT:    adcs r1, r3
353; CHECK-NEXT:    vmov r2, r3, d5
354; CHECK-NEXT:    vmov q2[2], q2[0], r2, r3
355; CHECK-NEXT:    vmov q2[3], q2[1], r2, r3
356; CHECK-NEXT:    vmov.s16 r2, q0[7]
357; CHECK-NEXT:    vmov.s16 r3, q0[6]
358; CHECK-NEXT:    vcmp.i32 ne, q2, zr
359; CHECK-NEXT:    vmov q0[2], q0[0], r3, r2
360; CHECK-NEXT:    asrs r2, r2, #31
361; CHECK-NEXT:    asrs r3, r3, #31
362; CHECK-NEXT:    vmov q0[3], q0[1], r3, r2
363; CHECK-NEXT:    vpsel q0, q0, q1
364; CHECK-NEXT:    vmov r2, r3, d0
365; CHECK-NEXT:    adds r0, r0, r2
366; CHECK-NEXT:    adcs r1, r3
367; CHECK-NEXT:    vmov r2, r3, d1
368; CHECK-NEXT:    adds r0, r0, r2
369; CHECK-NEXT:    adcs r1, r3
370; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
371; CHECK-NEXT:    bx lr
372entry:
373  %c = icmp eq <8 x i16> %b, zeroinitializer
374  %xx = sext <8 x i16> %x to <8 x i64>
375  %s = select <8 x i1> %c, <8 x i64> %xx, <8 x i64> zeroinitializer
376  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s)
377  ret i64 %z
378}
379
380define arm_aapcs_vfpcc i64 @add_v4i16_v4i64_zext(<4 x i16> %x, <4 x i16> %b) {
381; CHECK-LABEL: add_v4i16_v4i64_zext:
382; CHECK:       @ %bb.0: @ %entry
383; CHECK-NEXT:    vmovlb.u16 q1, q1
384; CHECK-NEXT:    vmovlb.u16 q0, q0
385; CHECK-NEXT:    vpt.i32 eq, q1, zr
386; CHECK-NEXT:    vaddlvt.u32 r0, r1, q0
387; CHECK-NEXT:    bx lr
388entry:
389  %c = icmp eq <4 x i16> %b, zeroinitializer
390  %xx = zext <4 x i16> %x to <4 x i64>
391  %s = select <4 x i1> %c, <4 x i64> %xx, <4 x i64> zeroinitializer
392  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s)
393  ret i64 %z
394}
395
396define arm_aapcs_vfpcc i64 @add_v4i16_v4i64_sext(<4 x i16> %x, <4 x i16> %b) {
397; CHECK-LABEL: add_v4i16_v4i64_sext:
398; CHECK:       @ %bb.0: @ %entry
399; CHECK-NEXT:    vmovlb.u16 q1, q1
400; CHECK-NEXT:    vmovlb.s16 q0, q0
401; CHECK-NEXT:    vpt.i32 eq, q1, zr
402; CHECK-NEXT:    vaddlvt.s32 r0, r1, q0
403; CHECK-NEXT:    bx lr
404entry:
405  %c = icmp eq <4 x i16> %b, zeroinitializer
406  %xx = sext <4 x i16> %x to <4 x i64>
407  %s = select <4 x i1> %c, <4 x i64> %xx, <4 x i64> zeroinitializer
408  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s)
409  ret i64 %z
410}
411
412define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_zext(<2 x i16> %x, <2 x i16> %b) {
413; CHECK-LABEL: add_v2i16_v2i64_zext:
414; CHECK:       @ %bb.0: @ %entry
415; CHECK-NEXT:    vmov.i64 q2, #0xffff
416; CHECK-NEXT:    movs r1, #0
417; CHECK-NEXT:    vand q1, q1, q2
418; CHECK-NEXT:    vand q0, q0, q2
419; CHECK-NEXT:    vmov r0, s4
420; CHECK-NEXT:    cmp r0, #0
421; CHECK-NEXT:    csetm r0, eq
422; CHECK-NEXT:    bfi r1, r0, #0, #8
423; CHECK-NEXT:    vmov r0, s6
424; CHECK-NEXT:    vmov.i32 q1, #0x0
425; CHECK-NEXT:    cmp r0, #0
426; CHECK-NEXT:    csetm r0, eq
427; CHECK-NEXT:    bfi r1, r0, #8, #8
428; CHECK-NEXT:    vmsr p0, r1
429; CHECK-NEXT:    vpsel q0, q0, q1
430; CHECK-NEXT:    vmov r0, r1, d1
431; CHECK-NEXT:    vmov r2, r3, d0
432; CHECK-NEXT:    add r0, r2
433; CHECK-NEXT:    orrs r1, r3
434; CHECK-NEXT:    bx lr
435entry:
436  %c = icmp eq <2 x i16> %b, zeroinitializer
437  %xx = zext <2 x i16> %x to <2 x i64>
438  %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
439  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
440  ret i64 %z
441}
442
443define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_sext(<2 x i16> %x, <2 x i16> %b) {
444; CHECK-LABEL: add_v2i16_v2i64_sext:
445; CHECK:       @ %bb.0: @ %entry
446; CHECK-NEXT:    vmov.i32 q2, #0xffff
447; CHECK-NEXT:    movs r1, #0
448; CHECK-NEXT:    vand q1, q1, q2
449; CHECK-NEXT:    vmov r0, s4
450; CHECK-NEXT:    cmp r0, #0
451; CHECK-NEXT:    csetm r0, eq
452; CHECK-NEXT:    bfi r1, r0, #0, #8
453; CHECK-NEXT:    vmov r0, s6
454; CHECK-NEXT:    vmov.i32 q1, #0x0
455; CHECK-NEXT:    cmp r0, #0
456; CHECK-NEXT:    csetm r0, eq
457; CHECK-NEXT:    bfi r1, r0, #8, #8
458; CHECK-NEXT:    vmov r0, s2
459; CHECK-NEXT:    vmsr p0, r1
460; CHECK-NEXT:    vmov r1, s0
461; CHECK-NEXT:    sxth r0, r0
462; CHECK-NEXT:    sxth r1, r1
463; CHECK-NEXT:    vmov q0[2], q0[0], r1, r0
464; CHECK-NEXT:    asrs r0, r0, #31
465; CHECK-NEXT:    asrs r1, r1, #31
466; CHECK-NEXT:    vmov q0[3], q0[1], r1, r0
467; CHECK-NEXT:    vpsel q0, q0, q1
468; CHECK-NEXT:    vmov r0, r1, d1
469; CHECK-NEXT:    vmov r2, r3, d0
470; CHECK-NEXT:    adds r0, r0, r2
471; CHECK-NEXT:    adcs r1, r3
472; CHECK-NEXT:    bx lr
473entry:
474  %c = icmp eq <2 x i16> %b, zeroinitializer
475  %xx = sext <2 x i16> %x to <2 x i64>
476  %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
477  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
478  ret i64 %z
479}
480
481define arm_aapcs_vfpcc i32 @add_v16i8_v16i32_zext(<16 x i8> %x, <16 x i8> %b) {
482; CHECK-LABEL: add_v16i8_v16i32_zext:
483; CHECK:       @ %bb.0: @ %entry
484; CHECK-NEXT:    vpt.i8 eq, q1, zr
485; CHECK-NEXT:    vaddvt.u8 r0, q0
486; CHECK-NEXT:    bx lr
487entry:
488  %c = icmp eq <16 x i8> %b, zeroinitializer
489  %xx = zext <16 x i8> %x to <16 x i32>
490  %s = select <16 x i1> %c, <16 x i32> %xx, <16 x i32> zeroinitializer
491  %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %s)
492  ret i32 %z
493}
494
495define arm_aapcs_vfpcc i32 @add_v16i8_v16i32_sext(<16 x i8> %x, <16 x i8> %b) {
496; CHECK-LABEL: add_v16i8_v16i32_sext:
497; CHECK:       @ %bb.0: @ %entry
498; CHECK-NEXT:    vpt.i8 eq, q1, zr
499; CHECK-NEXT:    vaddvt.s8 r0, q0
500; CHECK-NEXT:    bx lr
501entry:
502  %c = icmp eq <16 x i8> %b, zeroinitializer
503  %xx = sext <16 x i8> %x to <16 x i32>
504  %s = select <16 x i1> %c, <16 x i32> %xx, <16 x i32> zeroinitializer
505  %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %s)
506  ret i32 %z
507}
508
509define arm_aapcs_vfpcc i32 @add_v8i8_v8i32_zext(<8 x i8> %x, <8 x i8> %b) {
510; CHECK-LABEL: add_v8i8_v8i32_zext:
511; CHECK:       @ %bb.0: @ %entry
512; CHECK-NEXT:    vmovlb.u8 q1, q1
513; CHECK-NEXT:    vmovlb.u8 q0, q0
514; CHECK-NEXT:    vpt.i16 eq, q1, zr
515; CHECK-NEXT:    vaddvt.u16 r0, q0
516; CHECK-NEXT:    bx lr
517entry:
518  %c = icmp eq <8 x i8> %b, zeroinitializer
519  %xx = zext <8 x i8> %x to <8 x i32>
520  %s = select <8 x i1> %c, <8 x i32> %xx, <8 x i32> zeroinitializer
521  %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %s)
522  ret i32 %z
523}
524
525define arm_aapcs_vfpcc i32 @add_v8i8_v8i32_sext(<8 x i8> %x, <8 x i8> %b) {
526; CHECK-LABEL: add_v8i8_v8i32_sext:
527; CHECK:       @ %bb.0: @ %entry
528; CHECK-NEXT:    vmovlb.u8 q1, q1
529; CHECK-NEXT:    vmovlb.s8 q0, q0
530; CHECK-NEXT:    vpt.i16 eq, q1, zr
531; CHECK-NEXT:    vaddvt.s16 r0, q0
532; CHECK-NEXT:    bx lr
533entry:
534  %c = icmp eq <8 x i8> %b, zeroinitializer
535  %xx = sext <8 x i8> %x to <8 x i32>
536  %s = select <8 x i1> %c, <8 x i32> %xx, <8 x i32> zeroinitializer
537  %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %s)
538  ret i32 %z
539}
540
541define arm_aapcs_vfpcc i32 @add_v4i8_v4i32_zext(<4 x i8> %x, <4 x i8> %b) {
542; CHECK-LABEL: add_v4i8_v4i32_zext:
543; CHECK:       @ %bb.0: @ %entry
544; CHECK-NEXT:    vmov.i32 q2, #0xff
545; CHECK-NEXT:    vand q1, q1, q2
546; CHECK-NEXT:    vand q0, q0, q2
547; CHECK-NEXT:    vpt.i32 eq, q1, zr
548; CHECK-NEXT:    vaddvt.u32 r0, q0
549; CHECK-NEXT:    bx lr
550entry:
551  %c = icmp eq <4 x i8> %b, zeroinitializer
552  %xx = zext <4 x i8> %x to <4 x i32>
553  %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer
554  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
555  ret i32 %z
556}
557
558define arm_aapcs_vfpcc i32 @add_v4i8_v4i32_sext(<4 x i8> %x, <4 x i8> %b) {
559; CHECK-LABEL: add_v4i8_v4i32_sext:
560; CHECK:       @ %bb.0: @ %entry
561; CHECK-NEXT:    vmov.i32 q2, #0xff
562; CHECK-NEXT:    vmovlb.s8 q0, q0
563; CHECK-NEXT:    vand q1, q1, q2
564; CHECK-NEXT:    vmovlb.s16 q0, q0
565; CHECK-NEXT:    vpt.i32 eq, q1, zr
566; CHECK-NEXT:    vaddvt.u32 r0, q0
567; CHECK-NEXT:    bx lr
568entry:
569  %c = icmp eq <4 x i8> %b, zeroinitializer
570  %xx = sext <4 x i8> %x to <4 x i32>
571  %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer
572  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
573  ret i32 %z
574}
575
576define arm_aapcs_vfpcc zeroext i16 @add_v16i8_v16i16_zext(<16 x i8> %x, <16 x i8> %b) {
577; CHECK-LABEL: add_v16i8_v16i16_zext:
578; CHECK:       @ %bb.0: @ %entry
579; CHECK-NEXT:    vpt.i8 eq, q1, zr
580; CHECK-NEXT:    vaddvt.u8 r0, q0
581; CHECK-NEXT:    uxth r0, r0
582; CHECK-NEXT:    bx lr
583entry:
584  %c = icmp eq <16 x i8> %b, zeroinitializer
585  %xx = zext <16 x i8> %x to <16 x i16>
586  %s = select <16 x i1> %c, <16 x i16> %xx, <16 x i16> zeroinitializer
587  %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %s)
588  ret i16 %z
589}
590
591define arm_aapcs_vfpcc signext i16 @add_v16i8_v16i16_sext(<16 x i8> %x, <16 x i8> %b) {
592; CHECK-LABEL: add_v16i8_v16i16_sext:
593; CHECK:       @ %bb.0: @ %entry
594; CHECK-NEXT:    vpt.i8 eq, q1, zr
595; CHECK-NEXT:    vaddvt.s8 r0, q0
596; CHECK-NEXT:    sxth r0, r0
597; CHECK-NEXT:    bx lr
598entry:
599  %c = icmp eq <16 x i8> %b, zeroinitializer
600  %xx = sext <16 x i8> %x to <16 x i16>
601  %s = select <16 x i1> %c, <16 x i16> %xx, <16 x i16> zeroinitializer
602  %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %s)
603  ret i16 %z
604}
605
606define arm_aapcs_vfpcc zeroext i16 @add_v8i8_v8i16_zext(<8 x i8> %x, <8 x i8> %b) {
607; CHECK-LABEL: add_v8i8_v8i16_zext:
608; CHECK:       @ %bb.0: @ %entry
609; CHECK-NEXT:    vmovlb.u8 q1, q1
610; CHECK-NEXT:    vmovlb.u8 q0, q0
611; CHECK-NEXT:    vpt.i16 eq, q1, zr
612; CHECK-NEXT:    vaddvt.u16 r0, q0
613; CHECK-NEXT:    uxth r0, r0
614; CHECK-NEXT:    bx lr
615entry:
616  %c = icmp eq <8 x i8> %b, zeroinitializer
617  %xx = zext <8 x i8> %x to <8 x i16>
618  %s = select <8 x i1> %c, <8 x i16> %xx, <8 x i16> zeroinitializer
619  %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s)
620  ret i16 %z
621}
622
623define arm_aapcs_vfpcc signext i16 @add_v8i8_v8i16_sext(<8 x i8> %x, <8 x i8> %b) {
624; CHECK-LABEL: add_v8i8_v8i16_sext:
625; CHECK:       @ %bb.0: @ %entry
626; CHECK-NEXT:    vmovlb.u8 q1, q1
627; CHECK-NEXT:    vmovlb.s8 q0, q0
628; CHECK-NEXT:    vpt.i16 eq, q1, zr
629; CHECK-NEXT:    vaddvt.u16 r0, q0
630; CHECK-NEXT:    sxth r0, r0
631; CHECK-NEXT:    bx lr
632entry:
633  %c = icmp eq <8 x i8> %b, zeroinitializer
634  %xx = sext <8 x i8> %x to <8 x i16>
635  %s = select <8 x i1> %c, <8 x i16> %xx, <8 x i16> zeroinitializer
636  %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s)
637  ret i16 %z
638}
639
640define arm_aapcs_vfpcc zeroext i8 @add_v16i8_v16i8(<16 x i8> %x, <16 x i8> %b) {
641; CHECK-LABEL: add_v16i8_v16i8:
642; CHECK:       @ %bb.0: @ %entry
643; CHECK-NEXT:    vpt.i8 eq, q1, zr
644; CHECK-NEXT:    vaddvt.u8 r0, q0
645; CHECK-NEXT:    uxtb r0, r0
646; CHECK-NEXT:    bx lr
647entry:
648  %c = icmp eq <16 x i8> %b, zeroinitializer
649  %s = select <16 x i1> %c, <16 x i8> %x, <16 x i8> zeroinitializer
650  %z = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %s)
651  ret i8 %z
652}
653
654define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_zext(<16 x i8> %x, <16 x i8> %b) {
655; CHECK-LABEL: add_v16i8_v16i64_zext:
656; CHECK:       @ %bb.0: @ %entry
657; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
658; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
659; CHECK-NEXT:    .pad #16
660; CHECK-NEXT:    sub sp, #16
661; CHECK-NEXT:    vmov q2, q0
662; CHECK-NEXT:    vcmp.i8 eq, q1, zr
663; CHECK-NEXT:    vmov.i8 q0, #0x0
664; CHECK-NEXT:    vmov.i8 q1, #0xff
665; CHECK-NEXT:    vpsel q5, q1, q0
666; CHECK-NEXT:    vstrw.32 q0, [sp] @ 16-byte Spill
667; CHECK-NEXT:    vmov.u8 r0, q5[0]
668; CHECK-NEXT:    vmov.16 q3[0], r0
669; CHECK-NEXT:    vmov.u8 r0, q5[1]
670; CHECK-NEXT:    vmov.16 q3[1], r0
671; CHECK-NEXT:    vmov.u8 r0, q5[2]
672; CHECK-NEXT:    vmov.16 q3[2], r0
673; CHECK-NEXT:    vmov.u8 r0, q5[3]
674; CHECK-NEXT:    vmov.16 q3[3], r0
675; CHECK-NEXT:    vmov.u8 r0, q5[4]
676; CHECK-NEXT:    vmov.16 q3[4], r0
677; CHECK-NEXT:    vmov.u8 r0, q5[5]
678; CHECK-NEXT:    vmov.16 q3[5], r0
679; CHECK-NEXT:    vmov.u8 r0, q5[6]
680; CHECK-NEXT:    vmov.16 q3[6], r0
681; CHECK-NEXT:    vmov.u8 r0, q5[7]
682; CHECK-NEXT:    vmov.16 q3[7], r0
683; CHECK-NEXT:    vcmp.i16 ne, q3, zr
684; CHECK-NEXT:    vpsel q6, q1, q0
685; CHECK-NEXT:    vmov.u16 r0, q6[2]
686; CHECK-NEXT:    vmov.u16 r1, q6[0]
687; CHECK-NEXT:    vmov q3[2], q3[0], r1, r0
688; CHECK-NEXT:    vmov.u16 r0, q6[3]
689; CHECK-NEXT:    vmov.u16 r1, q6[1]
690; CHECK-NEXT:    vmov q3[3], q3[1], r1, r0
691; CHECK-NEXT:    vcmp.i32 ne, q3, zr
692; CHECK-NEXT:    vpsel q7, q1, q0
693; CHECK-NEXT:    vmov r0, r1, d14
694; CHECK-NEXT:    vmov q3[2], q3[0], r0, r1
695; CHECK-NEXT:    vmov q3[3], q3[1], r0, r1
696; CHECK-NEXT:    vmov.u8 r0, q2[1]
697; CHECK-NEXT:    vmov.u8 r1, q2[0]
698; CHECK-NEXT:    vcmp.i32 ne, q3, zr
699; CHECK-NEXT:    vmov q4[2], q4[0], r1, r0
700; CHECK-NEXT:    vmov.i64 q3, #0xff
701; CHECK-NEXT:    vand q0, q4, q3
702; CHECK-NEXT:    vmov.i32 q4, #0x0
703; CHECK-NEXT:    vpsel q0, q0, q4
704; CHECK-NEXT:    vmov r0, r1, d1
705; CHECK-NEXT:    vmov r2, r3, d0
706; CHECK-NEXT:    orrs r1, r3
707; CHECK-NEXT:    add r0, r2
708; CHECK-NEXT:    vmov r2, r3, d15
709; CHECK-NEXT:    vldrw.u32 q7, [sp] @ 16-byte Reload
710; CHECK-NEXT:    vmov q0[2], q0[0], r2, r3
711; CHECK-NEXT:    vmov q0[3], q0[1], r2, r3
712; CHECK-NEXT:    vmov.u8 r2, q2[3]
713; CHECK-NEXT:    vmov.u8 r3, q2[2]
714; CHECK-NEXT:    vcmp.i32 ne, q0, zr
715; CHECK-NEXT:    vmov q0[2], q0[0], r3, r2
716; CHECK-NEXT:    vand q0, q0, q3
717; CHECK-NEXT:    vpsel q0, q0, q4
718; CHECK-NEXT:    vmov r2, r3, d0
719; CHECK-NEXT:    adds r0, r0, r2
720; CHECK-NEXT:    adcs r1, r3
721; CHECK-NEXT:    vmov r2, r3, d1
722; CHECK-NEXT:    adds r0, r0, r2
723; CHECK-NEXT:    vmov.u16 r2, q6[6]
724; CHECK-NEXT:    adcs r1, r3
725; CHECK-NEXT:    vmov.u16 r3, q6[4]
726; CHECK-NEXT:    vmov q0[2], q0[0], r3, r2
727; CHECK-NEXT:    vmov.u16 r2, q6[7]
728; CHECK-NEXT:    vmov.u16 r3, q6[5]
729; CHECK-NEXT:    vmov q0[3], q0[1], r3, r2
730; CHECK-NEXT:    vcmp.i32 ne, q0, zr
731; CHECK-NEXT:    vpsel q6, q1, q7
732; CHECK-NEXT:    vmov r2, r3, d12
733; CHECK-NEXT:    vmov q0[2], q0[0], r2, r3
734; CHECK-NEXT:    vmov q0[3], q0[1], r2, r3
735; CHECK-NEXT:    vmov.u8 r2, q2[5]
736; CHECK-NEXT:    vmov.u8 r3, q2[4]
737; CHECK-NEXT:    vcmp.i32 ne, q0, zr
738; CHECK-NEXT:    vmov q0[2], q0[0], r3, r2
739; CHECK-NEXT:    vand q0, q0, q3
740; CHECK-NEXT:    vpsel q0, q0, q4
741; CHECK-NEXT:    vmov r2, r3, d0
742; CHECK-NEXT:    adds r0, r0, r2
743; CHECK-NEXT:    adcs r1, r3
744; CHECK-NEXT:    vmov r2, r3, d1
745; CHECK-NEXT:    adds r0, r0, r2
746; CHECK-NEXT:    adcs r1, r3
747; CHECK-NEXT:    vmov r2, r3, d13
748; CHECK-NEXT:    vmov q0[2], q0[0], r2, r3
749; CHECK-NEXT:    vmov q0[3], q0[1], r2, r3
750; CHECK-NEXT:    vmov.u8 r2, q2[7]
751; CHECK-NEXT:    vmov.u8 r3, q2[6]
752; CHECK-NEXT:    vcmp.i32 ne, q0, zr
753; CHECK-NEXT:    vmov q0[2], q0[0], r3, r2
754; CHECK-NEXT:    vand q0, q0, q3
755; CHECK-NEXT:    vpsel q0, q0, q4
756; CHECK-NEXT:    vmov r2, r3, d0
757; CHECK-NEXT:    adds r0, r0, r2
758; CHECK-NEXT:    adcs r1, r3
759; CHECK-NEXT:    vmov r2, r3, d1
760; CHECK-NEXT:    adds r0, r0, r2
761; CHECK-NEXT:    vmov.u8 r2, q5[8]
762; CHECK-NEXT:    vmov.16 q6[0], r2
763; CHECK-NEXT:    vmov.u8 r2, q5[9]
764; CHECK-NEXT:    vmov.16 q6[1], r2
765; CHECK-NEXT:    vmov.u8 r2, q5[10]
766; CHECK-NEXT:    vmov.16 q6[2], r2
767; CHECK-NEXT:    vmov.u8 r2, q5[11]
768; CHECK-NEXT:    vmov.16 q6[3], r2
769; CHECK-NEXT:    vmov.u8 r2, q5[12]
770; CHECK-NEXT:    vmov.16 q6[4], r2
771; CHECK-NEXT:    vmov.u8 r2, q5[13]
772; CHECK-NEXT:    vmov.16 q6[5], r2
773; CHECK-NEXT:    vmov.u8 r2, q5[14]
774; CHECK-NEXT:    vmov.16 q6[6], r2
775; CHECK-NEXT:    vmov.u8 r2, q5[15]
776; CHECK-NEXT:    vmov.16 q6[7], r2
777; CHECK-NEXT:    adcs r1, r3
778; CHECK-NEXT:    vcmp.i16 ne, q6, zr
779; CHECK-NEXT:    vpsel q5, q1, q7
780; CHECK-NEXT:    vmov.u16 r2, q5[2]
781; CHECK-NEXT:    vmov.u16 r3, q5[0]
782; CHECK-NEXT:    vmov q0[2], q0[0], r3, r2
783; CHECK-NEXT:    vmov.u16 r2, q5[3]
784; CHECK-NEXT:    vmov.u16 r3, q5[1]
785; CHECK-NEXT:    vmov q0[3], q0[1], r3, r2
786; CHECK-NEXT:    vcmp.i32 ne, q0, zr
787; CHECK-NEXT:    vpsel q6, q1, q7
788; CHECK-NEXT:    vmov r2, r3, d12
789; CHECK-NEXT:    vmov q0[2], q0[0], r2, r3
790; CHECK-NEXT:    vmov q0[3], q0[1], r2, r3
791; CHECK-NEXT:    vmov.u8 r2, q2[9]
792; CHECK-NEXT:    vmov.u8 r3, q2[8]
793; CHECK-NEXT:    vcmp.i32 ne, q0, zr
794; CHECK-NEXT:    vmov q0[2], q0[0], r3, r2
795; CHECK-NEXT:    vand q0, q0, q3
796; CHECK-NEXT:    vpsel q0, q0, q4
797; CHECK-NEXT:    vmov r2, r3, d0
798; CHECK-NEXT:    adds r0, r0, r2
799; CHECK-NEXT:    adcs r1, r3
800; CHECK-NEXT:    vmov r2, r3, d1
801; CHECK-NEXT:    adds r0, r0, r2
802; CHECK-NEXT:    adcs r1, r3
803; CHECK-NEXT:    vmov r2, r3, d13
804; CHECK-NEXT:    vmov q0[2], q0[0], r2, r3
805; CHECK-NEXT:    vmov q0[3], q0[1], r2, r3
806; CHECK-NEXT:    vmov.u8 r2, q2[11]
807; CHECK-NEXT:    vmov.u8 r3, q2[10]
808; CHECK-NEXT:    vcmp.i32 ne, q0, zr
809; CHECK-NEXT:    vmov q0[2], q0[0], r3, r2
810; CHECK-NEXT:    vand q0, q0, q3
811; CHECK-NEXT:    vpsel q0, q0, q4
812; CHECK-NEXT:    vmov r2, r3, d0
813; CHECK-NEXT:    adds r0, r0, r2
814; CHECK-NEXT:    adcs r1, r3
815; CHECK-NEXT:    vmov r2, r3, d1
816; CHECK-NEXT:    adds r0, r0, r2
817; CHECK-NEXT:    vmov.u16 r2, q5[6]
818; CHECK-NEXT:    adcs r1, r3
819; CHECK-NEXT:    vmov.u16 r3, q5[4]
820; CHECK-NEXT:    vmov q0[2], q0[0], r3, r2
821; CHECK-NEXT:    vmov.u16 r2, q5[7]
822; CHECK-NEXT:    vmov.u16 r3, q5[5]
823; CHECK-NEXT:    vmov q0[3], q0[1], r3, r2
824; CHECK-NEXT:    vcmp.i32 ne, q0, zr
825; CHECK-NEXT:    vpsel q1, q1, q7
826; CHECK-NEXT:    vmov r2, r3, d2
827; CHECK-NEXT:    vmov q0[2], q0[0], r2, r3
828; CHECK-NEXT:    vmov q0[3], q0[1], r2, r3
829; CHECK-NEXT:    vmov.u8 r2, q2[13]
830; CHECK-NEXT:    vmov.u8 r3, q2[12]
831; CHECK-NEXT:    vcmp.i32 ne, q0, zr
832; CHECK-NEXT:    vmov q0[2], q0[0], r3, r2
833; CHECK-NEXT:    vand q0, q0, q3
834; CHECK-NEXT:    vpsel q0, q0, q4
835; CHECK-NEXT:    vmov r2, r3, d0
836; CHECK-NEXT:    adds r0, r0, r2
837; CHECK-NEXT:    adcs r1, r3
838; CHECK-NEXT:    vmov r2, r3, d1
839; CHECK-NEXT:    adds r0, r0, r2
840; CHECK-NEXT:    adcs r1, r3
841; CHECK-NEXT:    vmov r2, r3, d3
842; CHECK-NEXT:    vmov q0[2], q0[0], r2, r3
843; CHECK-NEXT:    vmov q0[3], q0[1], r2, r3
844; CHECK-NEXT:    vmov.u8 r2, q2[15]
845; CHECK-NEXT:    vmov.u8 r3, q2[14]
846; CHECK-NEXT:    vcmp.i32 ne, q0, zr
847; CHECK-NEXT:    vmov q0[2], q0[0], r3, r2
848; CHECK-NEXT:    vand q0, q0, q3
849; CHECK-NEXT:    vpsel q0, q0, q4
850; CHECK-NEXT:    vmov r2, r3, d0
851; CHECK-NEXT:    adds r0, r0, r2
852; CHECK-NEXT:    adcs r1, r3
853; CHECK-NEXT:    vmov r2, r3, d1
854; CHECK-NEXT:    adds r0, r0, r2
855; CHECK-NEXT:    adcs r1, r3
856; CHECK-NEXT:    add sp, #16
857; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
858; CHECK-NEXT:    bx lr
859entry:
860  %c = icmp eq <16 x i8> %b, zeroinitializer
861  %xx = zext <16 x i8> %x to <16 x i64>
862  %s = select <16 x i1> %c, <16 x i64> %xx, <16 x i64> zeroinitializer
863  %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %s)
864  ret i64 %z
865}
866
867define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %b) {
868; CHECK-LABEL: add_v16i8_v16i64_sext:
869; CHECK:       @ %bb.0: @ %entry
870; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
871; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
872; CHECK-NEXT:    vcmp.i8 eq, q1, zr
873; CHECK-NEXT:    vmov.i8 q1, #0x0
874; CHECK-NEXT:    vmov.i8 q2, #0xff
875; CHECK-NEXT:    vpsel q4, q2, q1
876; CHECK-NEXT:    vmov.u8 r0, q4[0]
877; CHECK-NEXT:    vmov.16 q3[0], r0
878; CHECK-NEXT:    vmov.u8 r0, q4[1]
879; CHECK-NEXT:    vmov.16 q3[1], r0
880; CHECK-NEXT:    vmov.u8 r0, q4[2]
881; CHECK-NEXT:    vmov.16 q3[2], r0
882; CHECK-NEXT:    vmov.u8 r0, q4[3]
883; CHECK-NEXT:    vmov.16 q3[3], r0
884; CHECK-NEXT:    vmov.u8 r0, q4[4]
885; CHECK-NEXT:    vmov.16 q3[4], r0
886; CHECK-NEXT:    vmov.u8 r0, q4[5]
887; CHECK-NEXT:    vmov.16 q3[5], r0
888; CHECK-NEXT:    vmov.u8 r0, q4[6]
889; CHECK-NEXT:    vmov.16 q3[6], r0
890; CHECK-NEXT:    vmov.u8 r0, q4[7]
891; CHECK-NEXT:    vmov.16 q3[7], r0
892; CHECK-NEXT:    vcmp.i16 ne, q3, zr
893; CHECK-NEXT:    vpsel q5, q2, q1
894; CHECK-NEXT:    vmov.u16 r0, q5[2]
895; CHECK-NEXT:    vmov.u16 r1, q5[0]
896; CHECK-NEXT:    vmov q3[2], q3[0], r1, r0
897; CHECK-NEXT:    vmov.u16 r0, q5[3]
898; CHECK-NEXT:    vmov.u16 r1, q5[1]
899; CHECK-NEXT:    vmov q3[3], q3[1], r1, r0
900; CHECK-NEXT:    vcmp.i32 ne, q3, zr
901; CHECK-NEXT:    vpsel q6, q2, q1
902; CHECK-NEXT:    vmov r0, r1, d12
903; CHECK-NEXT:    vmov q3[2], q3[0], r0, r1
904; CHECK-NEXT:    vmov q3[3], q3[1], r0, r1
905; CHECK-NEXT:    vmov.s8 r0, q0[1]
906; CHECK-NEXT:    vmov.s8 r1, q0[0]
907; CHECK-NEXT:    vcmp.i32 ne, q3, zr
908; CHECK-NEXT:    vmov q7[2], q7[0], r1, r0
909; CHECK-NEXT:    asrs r0, r0, #31
910; CHECK-NEXT:    asrs r1, r1, #31
911; CHECK-NEXT:    vmov.i32 q3, #0x0
912; CHECK-NEXT:    vmov q7[3], q7[1], r1, r0
913; CHECK-NEXT:    vpsel q7, q7, q3
914; CHECK-NEXT:    vmov r0, r1, d15
915; CHECK-NEXT:    vmov r2, r3, d14
916; CHECK-NEXT:    adds r0, r0, r2
917; CHECK-NEXT:    adcs r1, r3
918; CHECK-NEXT:    vmov r2, r3, d13
919; CHECK-NEXT:    vmov q6[2], q6[0], r2, r3
920; CHECK-NEXT:    vmov q6[3], q6[1], r2, r3
921; CHECK-NEXT:    vmov.s8 r2, q0[3]
922; CHECK-NEXT:    vmov.s8 r3, q0[2]
923; CHECK-NEXT:    vcmp.i32 ne, q6, zr
924; CHECK-NEXT:    vmov q6[2], q6[0], r3, r2
925; CHECK-NEXT:    asrs r2, r2, #31
926; CHECK-NEXT:    asrs r3, r3, #31
927; CHECK-NEXT:    vmov q6[3], q6[1], r3, r2
928; CHECK-NEXT:    vpsel q6, q6, q3
929; CHECK-NEXT:    vmov r2, r3, d12
930; CHECK-NEXT:    adds r0, r0, r2
931; CHECK-NEXT:    adcs r1, r3
932; CHECK-NEXT:    vmov r2, r3, d13
933; CHECK-NEXT:    adds r0, r0, r2
934; CHECK-NEXT:    vmov.u16 r2, q5[6]
935; CHECK-NEXT:    adcs r1, r3
936; CHECK-NEXT:    vmov.u16 r3, q5[4]
937; CHECK-NEXT:    vmov q6[2], q6[0], r3, r2
938; CHECK-NEXT:    vmov.u16 r2, q5[7]
939; CHECK-NEXT:    vmov.u16 r3, q5[5]
940; CHECK-NEXT:    vmov q6[3], q6[1], r3, r2
941; CHECK-NEXT:    vcmp.i32 ne, q6, zr
942; CHECK-NEXT:    vpsel q5, q2, q1
943; CHECK-NEXT:    vmov r2, r3, d10
944; CHECK-NEXT:    vmov q6[2], q6[0], r2, r3
945; CHECK-NEXT:    vmov q6[3], q6[1], r2, r3
946; CHECK-NEXT:    vmov.s8 r2, q0[5]
947; CHECK-NEXT:    vmov.s8 r3, q0[4]
948; CHECK-NEXT:    vcmp.i32 ne, q6, zr
949; CHECK-NEXT:    vmov q6[2], q6[0], r3, r2
950; CHECK-NEXT:    asrs r2, r2, #31
951; CHECK-NEXT:    asrs r3, r3, #31
952; CHECK-NEXT:    vmov q6[3], q6[1], r3, r2
953; CHECK-NEXT:    vpsel q6, q6, q3
954; CHECK-NEXT:    vmov r2, r3, d12
955; CHECK-NEXT:    adds r0, r0, r2
956; CHECK-NEXT:    adcs r1, r3
957; CHECK-NEXT:    vmov r2, r3, d13
958; CHECK-NEXT:    adds r0, r0, r2
959; CHECK-NEXT:    adcs r1, r3
960; CHECK-NEXT:    vmov r2, r3, d11
961; CHECK-NEXT:    vmov q5[2], q5[0], r2, r3
962; CHECK-NEXT:    vmov q5[3], q5[1], r2, r3
963; CHECK-NEXT:    vmov.s8 r2, q0[7]
964; CHECK-NEXT:    vmov.s8 r3, q0[6]
965; CHECK-NEXT:    vcmp.i32 ne, q5, zr
966; CHECK-NEXT:    vmov q5[2], q5[0], r3, r2
967; CHECK-NEXT:    asrs r2, r2, #31
968; CHECK-NEXT:    asrs r3, r3, #31
969; CHECK-NEXT:    vmov q5[3], q5[1], r3, r2
970; CHECK-NEXT:    vpsel q5, q5, q3
971; CHECK-NEXT:    vmov r2, r3, d10
972; CHECK-NEXT:    adds r0, r0, r2
973; CHECK-NEXT:    adcs r1, r3
974; CHECK-NEXT:    vmov r2, r3, d11
975; CHECK-NEXT:    adds r0, r0, r2
976; CHECK-NEXT:    vmov.u8 r2, q4[8]
977; CHECK-NEXT:    vmov.16 q5[0], r2
978; CHECK-NEXT:    vmov.u8 r2, q4[9]
979; CHECK-NEXT:    vmov.16 q5[1], r2
980; CHECK-NEXT:    vmov.u8 r2, q4[10]
981; CHECK-NEXT:    vmov.16 q5[2], r2
982; CHECK-NEXT:    vmov.u8 r2, q4[11]
983; CHECK-NEXT:    vmov.16 q5[3], r2
984; CHECK-NEXT:    vmov.u8 r2, q4[12]
985; CHECK-NEXT:    vmov.16 q5[4], r2
986; CHECK-NEXT:    vmov.u8 r2, q4[13]
987; CHECK-NEXT:    vmov.16 q5[5], r2
988; CHECK-NEXT:    vmov.u8 r2, q4[14]
989; CHECK-NEXT:    vmov.16 q5[6], r2
990; CHECK-NEXT:    vmov.u8 r2, q4[15]
991; CHECK-NEXT:    vmov.16 q5[7], r2
992; CHECK-NEXT:    adcs r1, r3
993; CHECK-NEXT:    vcmp.i16 ne, q5, zr
994; CHECK-NEXT:    vpsel q4, q2, q1
995; CHECK-NEXT:    vmov.u16 r2, q4[2]
996; CHECK-NEXT:    vmov.u16 r3, q4[0]
997; CHECK-NEXT:    vmov q5[2], q5[0], r3, r2
998; CHECK-NEXT:    vmov.u16 r2, q4[3]
999; CHECK-NEXT:    vmov.u16 r3, q4[1]
1000; CHECK-NEXT:    vmov q5[3], q5[1], r3, r2
1001; CHECK-NEXT:    vcmp.i32 ne, q5, zr
1002; CHECK-NEXT:    vpsel q5, q2, q1
1003; CHECK-NEXT:    vmov r2, r3, d10
1004; CHECK-NEXT:    vmov q6[2], q6[0], r2, r3
1005; CHECK-NEXT:    vmov q6[3], q6[1], r2, r3
1006; CHECK-NEXT:    vmov.s8 r2, q0[9]
1007; CHECK-NEXT:    vmov.s8 r3, q0[8]
1008; CHECK-NEXT:    vcmp.i32 ne, q6, zr
1009; CHECK-NEXT:    vmov q6[2], q6[0], r3, r2
1010; CHECK-NEXT:    asrs r2, r2, #31
1011; CHECK-NEXT:    asrs r3, r3, #31
1012; CHECK-NEXT:    vmov q6[3], q6[1], r3, r2
1013; CHECK-NEXT:    vpsel q6, q6, q3
1014; CHECK-NEXT:    vmov r2, r3, d12
1015; CHECK-NEXT:    adds r0, r0, r2
1016; CHECK-NEXT:    adcs r1, r3
1017; CHECK-NEXT:    vmov r2, r3, d13
1018; CHECK-NEXT:    adds r0, r0, r2
1019; CHECK-NEXT:    adcs r1, r3
1020; CHECK-NEXT:    vmov r2, r3, d11
1021; CHECK-NEXT:    vmov q5[2], q5[0], r2, r3
1022; CHECK-NEXT:    vmov q5[3], q5[1], r2, r3
1023; CHECK-NEXT:    vmov.s8 r2, q0[11]
1024; CHECK-NEXT:    vmov.s8 r3, q0[10]
1025; CHECK-NEXT:    vcmp.i32 ne, q5, zr
1026; CHECK-NEXT:    vmov q5[2], q5[0], r3, r2
1027; CHECK-NEXT:    asrs r2, r2, #31
1028; CHECK-NEXT:    asrs r3, r3, #31
1029; CHECK-NEXT:    vmov q5[3], q5[1], r3, r2
1030; CHECK-NEXT:    vpsel q5, q5, q3
1031; CHECK-NEXT:    vmov r2, r3, d10
1032; CHECK-NEXT:    adds r0, r0, r2
1033; CHECK-NEXT:    adcs r1, r3
1034; CHECK-NEXT:    vmov r2, r3, d11
1035; CHECK-NEXT:    adds r0, r0, r2
1036; CHECK-NEXT:    vmov.u16 r2, q4[6]
1037; CHECK-NEXT:    adcs r1, r3
1038; CHECK-NEXT:    vmov.u16 r3, q4[4]
1039; CHECK-NEXT:    vmov q5[2], q5[0], r3, r2
1040; CHECK-NEXT:    vmov.u16 r2, q4[7]
1041; CHECK-NEXT:    vmov.u16 r3, q4[5]
1042; CHECK-NEXT:    vmov q5[3], q5[1], r3, r2
1043; CHECK-NEXT:    vcmp.i32 ne, q5, zr
1044; CHECK-NEXT:    vpsel q1, q2, q1
1045; CHECK-NEXT:    vmov r2, r3, d2
1046; CHECK-NEXT:    vmov q2[2], q2[0], r2, r3
1047; CHECK-NEXT:    vmov q2[3], q2[1], r2, r3
1048; CHECK-NEXT:    vmov.s8 r2, q0[13]
1049; CHECK-NEXT:    vmov.s8 r3, q0[12]
1050; CHECK-NEXT:    vcmp.i32 ne, q2, zr
1051; CHECK-NEXT:    vmov q2[2], q2[0], r3, r2
1052; CHECK-NEXT:    asrs r2, r2, #31
1053; CHECK-NEXT:    asrs r3, r3, #31
1054; CHECK-NEXT:    vmov q2[3], q2[1], r3, r2
1055; CHECK-NEXT:    vpsel q2, q2, q3
1056; CHECK-NEXT:    vmov r2, r3, d4
1057; CHECK-NEXT:    adds r0, r0, r2
1058; CHECK-NEXT:    adcs r1, r3
1059; CHECK-NEXT:    vmov r2, r3, d5
1060; CHECK-NEXT:    adds r0, r0, r2
1061; CHECK-NEXT:    adcs r1, r3
1062; CHECK-NEXT:    vmov r2, r3, d3
1063; CHECK-NEXT:    vmov q1[2], q1[0], r2, r3
1064; CHECK-NEXT:    vmov q1[3], q1[1], r2, r3
1065; CHECK-NEXT:    vmov.s8 r2, q0[15]
1066; CHECK-NEXT:    vmov.s8 r3, q0[14]
1067; CHECK-NEXT:    vcmp.i32 ne, q1, zr
1068; CHECK-NEXT:    vmov q0[2], q0[0], r3, r2
1069; CHECK-NEXT:    asrs r2, r2, #31
1070; CHECK-NEXT:    asrs r3, r3, #31
1071; CHECK-NEXT:    vmov q0[3], q0[1], r3, r2
1072; CHECK-NEXT:    vpsel q0, q0, q3
1073; CHECK-NEXT:    vmov r2, r3, d0
1074; CHECK-NEXT:    adds r0, r0, r2
1075; CHECK-NEXT:    adcs r1, r3
1076; CHECK-NEXT:    vmov r2, r3, d1
1077; CHECK-NEXT:    adds r0, r0, r2
1078; CHECK-NEXT:    adcs r1, r3
1079; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
1080; CHECK-NEXT:    bx lr
1081entry:
1082  %c = icmp eq <16 x i8> %b, zeroinitializer
1083  %xx = sext <16 x i8> %x to <16 x i64>
1084  %s = select <16 x i1> %c, <16 x i64> %xx, <16 x i64> zeroinitializer
1085  %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %s)
1086  ret i64 %z
1087}
1088
1089define arm_aapcs_vfpcc i64 @add_v8i8_v8i64_zext(<8 x i8> %x, <8 x i8> %b) {
1090; CHECK-LABEL: add_v8i8_v8i64_zext:
1091; CHECK:       @ %bb.0: @ %entry
1092; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
1093; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
1094; CHECK-NEXT:    vmovlb.u8 q1, q1
1095; CHECK-NEXT:    vmov.i8 q3, #0x0
1096; CHECK-NEXT:    vcmp.i16 eq, q1, zr
1097; CHECK-NEXT:    vmov.i8 q4, #0xff
1098; CHECK-NEXT:    vpsel q5, q4, q3
1099; CHECK-NEXT:    vmovlb.u8 q0, q0
1100; CHECK-NEXT:    vmov.u16 r0, q5[2]
1101; CHECK-NEXT:    vmov.u16 r1, q5[0]
1102; CHECK-NEXT:    vmov q1[2], q1[0], r1, r0
1103; CHECK-NEXT:    vmov.u16 r0, q5[3]
1104; CHECK-NEXT:    vmov.u16 r1, q5[1]
1105; CHECK-NEXT:    vmov q1[3], q1[1], r1, r0
1106; CHECK-NEXT:    vcmp.i32 ne, q1, zr
1107; CHECK-NEXT:    vpsel q6, q4, q3
1108; CHECK-NEXT:    vmov r0, r1, d12
1109; CHECK-NEXT:    vmov q1[2], q1[0], r0, r1
1110; CHECK-NEXT:    vmov q1[3], q1[1], r0, r1
1111; CHECK-NEXT:    vmov.u16 r0, q0[1]
1112; CHECK-NEXT:    vmov.u16 r1, q0[0]
1113; CHECK-NEXT:    vcmp.i32 ne, q1, zr
1114; CHECK-NEXT:    vmov q2[2], q2[0], r1, r0
1115; CHECK-NEXT:    vmov.i64 q1, #0xffff
1116; CHECK-NEXT:    vand q7, q2, q1
1117; CHECK-NEXT:    vmov.i32 q2, #0x0
1118; CHECK-NEXT:    vpsel q7, q7, q2
1119; CHECK-NEXT:    vmov r0, r1, d15
1120; CHECK-NEXT:    vmov r2, r3, d14
1121; CHECK-NEXT:    orrs r1, r3
1122; CHECK-NEXT:    add r0, r2
1123; CHECK-NEXT:    vmov r2, r3, d13
1124; CHECK-NEXT:    vmov q6[2], q6[0], r2, r3
1125; CHECK-NEXT:    vmov q6[3], q6[1], r2, r3
1126; CHECK-NEXT:    vmov.u16 r2, q0[3]
1127; CHECK-NEXT:    vmov.u16 r3, q0[2]
1128; CHECK-NEXT:    vcmp.i32 ne, q6, zr
1129; CHECK-NEXT:    vmov q6[2], q6[0], r3, r2
1130; CHECK-NEXT:    vand q6, q6, q1
1131; CHECK-NEXT:    vpsel q6, q6, q2
1132; CHECK-NEXT:    vmov r2, r3, d12
1133; CHECK-NEXT:    adds r0, r0, r2
1134; CHECK-NEXT:    adcs r1, r3
1135; CHECK-NEXT:    vmov r2, r3, d13
1136; CHECK-NEXT:    adds r0, r0, r2
1137; CHECK-NEXT:    vmov.u16 r2, q5[6]
1138; CHECK-NEXT:    adcs r1, r3
1139; CHECK-NEXT:    vmov.u16 r3, q5[4]
1140; CHECK-NEXT:    vmov q6[2], q6[0], r3, r2
1141; CHECK-NEXT:    vmov.u16 r2, q5[7]
1142; CHECK-NEXT:    vmov.u16 r3, q5[5]
1143; CHECK-NEXT:    vmov q6[3], q6[1], r3, r2
1144; CHECK-NEXT:    vcmp.i32 ne, q6, zr
1145; CHECK-NEXT:    vpsel q3, q4, q3
1146; CHECK-NEXT:    vmov r2, r3, d6
1147; CHECK-NEXT:    vmov q4[2], q4[0], r2, r3
1148; CHECK-NEXT:    vmov q4[3], q4[1], r2, r3
1149; CHECK-NEXT:    vmov.u16 r2, q0[5]
1150; CHECK-NEXT:    vmov.u16 r3, q0[4]
1151; CHECK-NEXT:    vcmp.i32 ne, q4, zr
1152; CHECK-NEXT:    vmov q4[2], q4[0], r3, r2
1153; CHECK-NEXT:    vand q4, q4, q1
1154; CHECK-NEXT:    vpsel q4, q4, q2
1155; CHECK-NEXT:    vmov r2, r3, d8
1156; CHECK-NEXT:    adds r0, r0, r2
1157; CHECK-NEXT:    adcs r1, r3
1158; CHECK-NEXT:    vmov r2, r3, d9
1159; CHECK-NEXT:    adds r0, r0, r2
1160; CHECK-NEXT:    adcs r1, r3
1161; CHECK-NEXT:    vmov r2, r3, d7
1162; CHECK-NEXT:    vmov q3[2], q3[0], r2, r3
1163; CHECK-NEXT:    vmov q3[3], q3[1], r2, r3
1164; CHECK-NEXT:    vmov.u16 r2, q0[7]
1165; CHECK-NEXT:    vmov.u16 r3, q0[6]
1166; CHECK-NEXT:    vcmp.i32 ne, q3, zr
1167; CHECK-NEXT:    vmov q0[2], q0[0], r3, r2
1168; CHECK-NEXT:    vand q0, q0, q1
1169; CHECK-NEXT:    vpsel q0, q0, q2
1170; CHECK-NEXT:    vmov r2, r3, d0
1171; CHECK-NEXT:    adds r0, r0, r2
1172; CHECK-NEXT:    adcs r1, r3
1173; CHECK-NEXT:    vmov r2, r3, d1
1174; CHECK-NEXT:    adds r0, r0, r2
1175; CHECK-NEXT:    adcs r1, r3
1176; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
1177; CHECK-NEXT:    bx lr
1178entry:
1179  %c = icmp eq <8 x i8> %b, zeroinitializer
1180  %xx = zext <8 x i8> %x to <8 x i64>
1181  %s = select <8 x i1> %c, <8 x i64> %xx, <8 x i64> zeroinitializer
1182  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s)
1183  ret i64 %z
1184}
1185
1186define arm_aapcs_vfpcc i64 @add_v8i8_v8i64_sext(<8 x i8> %x, <8 x i8> %b) {
1187; CHECK-LABEL: add_v8i8_v8i64_sext:
1188; CHECK:       @ %bb.0: @ %entry
1189; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
1190; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
1191; CHECK-NEXT:    vmovlb.u8 q1, q1
1192; CHECK-NEXT:    vmov.i8 q2, #0x0
1193; CHECK-NEXT:    vcmp.i16 eq, q1, zr
1194; CHECK-NEXT:    vmov.i8 q3, #0xff
1195; CHECK-NEXT:    vpsel q4, q3, q2
1196; CHECK-NEXT:    vmov.u16 r0, q4[2]
1197; CHECK-NEXT:    vmov.u16 r1, q4[0]
1198; CHECK-NEXT:    vmov q1[2], q1[0], r1, r0
1199; CHECK-NEXT:    vmov.u16 r0, q4[3]
1200; CHECK-NEXT:    vmov.u16 r1, q4[1]
1201; CHECK-NEXT:    vmov q1[3], q1[1], r1, r0
1202; CHECK-NEXT:    vcmp.i32 ne, q1, zr
1203; CHECK-NEXT:    vpsel q5, q3, q2
1204; CHECK-NEXT:    vmov r0, r1, d10
1205; CHECK-NEXT:    vmov q1[2], q1[0], r0, r1
1206; CHECK-NEXT:    vmov q1[3], q1[1], r0, r1
1207; CHECK-NEXT:    vmov.u16 r0, q0[1]
1208; CHECK-NEXT:    vmov.u16 r1, q0[0]
1209; CHECK-NEXT:    sxtb r0, r0
1210; CHECK-NEXT:    sxtb r1, r1
1211; CHECK-NEXT:    vcmp.i32 ne, q1, zr
1212; CHECK-NEXT:    vmov q6[2], q6[0], r1, r0
1213; CHECK-NEXT:    asrs r0, r0, #31
1214; CHECK-NEXT:    asrs r1, r1, #31
1215; CHECK-NEXT:    vmov.i32 q1, #0x0
1216; CHECK-NEXT:    vmov q6[3], q6[1], r1, r0
1217; CHECK-NEXT:    vpsel q6, q6, q1
1218; CHECK-NEXT:    vmov r0, r1, d13
1219; CHECK-NEXT:    vmov r2, r3, d12
1220; CHECK-NEXT:    adds r0, r0, r2
1221; CHECK-NEXT:    adcs r1, r3
1222; CHECK-NEXT:    vmov r2, r3, d11
1223; CHECK-NEXT:    vmov q5[2], q5[0], r2, r3
1224; CHECK-NEXT:    vmov q5[3], q5[1], r2, r3
1225; CHECK-NEXT:    vmov.u16 r2, q0[3]
1226; CHECK-NEXT:    vmov.u16 r3, q0[2]
1227; CHECK-NEXT:    sxtb r2, r2
1228; CHECK-NEXT:    sxtb r3, r3
1229; CHECK-NEXT:    vcmp.i32 ne, q5, zr
1230; CHECK-NEXT:    vmov q5[2], q5[0], r3, r2
1231; CHECK-NEXT:    asrs r2, r2, #31
1232; CHECK-NEXT:    asrs r3, r3, #31
1233; CHECK-NEXT:    vmov q5[3], q5[1], r3, r2
1234; CHECK-NEXT:    vpsel q5, q5, q1
1235; CHECK-NEXT:    vmov r2, r3, d10
1236; CHECK-NEXT:    adds r0, r0, r2
1237; CHECK-NEXT:    adcs r1, r3
1238; CHECK-NEXT:    vmov r2, r3, d11
1239; CHECK-NEXT:    adds r0, r0, r2
1240; CHECK-NEXT:    vmov.u16 r2, q4[6]
1241; CHECK-NEXT:    adcs r1, r3
1242; CHECK-NEXT:    vmov.u16 r3, q4[4]
1243; CHECK-NEXT:    vmov q5[2], q5[0], r3, r2
1244; CHECK-NEXT:    vmov.u16 r2, q4[7]
1245; CHECK-NEXT:    vmov.u16 r3, q4[5]
1246; CHECK-NEXT:    vmov q5[3], q5[1], r3, r2
1247; CHECK-NEXT:    vcmp.i32 ne, q5, zr
1248; CHECK-NEXT:    vpsel q2, q3, q2
1249; CHECK-NEXT:    vmov r2, r3, d4
1250; CHECK-NEXT:    vmov q3[2], q3[0], r2, r3
1251; CHECK-NEXT:    vmov q3[3], q3[1], r2, r3
1252; CHECK-NEXT:    vmov.u16 r2, q0[5]
1253; CHECK-NEXT:    vmov.u16 r3, q0[4]
1254; CHECK-NEXT:    sxtb r2, r2
1255; CHECK-NEXT:    sxtb r3, r3
1256; CHECK-NEXT:    vcmp.i32 ne, q3, zr
1257; CHECK-NEXT:    vmov q3[2], q3[0], r3, r2
1258; CHECK-NEXT:    asrs r2, r2, #31
1259; CHECK-NEXT:    asrs r3, r3, #31
1260; CHECK-NEXT:    vmov q3[3], q3[1], r3, r2
1261; CHECK-NEXT:    vpsel q3, q3, q1
1262; CHECK-NEXT:    vmov r2, r3, d6
1263; CHECK-NEXT:    adds r0, r0, r2
1264; CHECK-NEXT:    adcs r1, r3
1265; CHECK-NEXT:    vmov r2, r3, d7
1266; CHECK-NEXT:    adds r0, r0, r2
1267; CHECK-NEXT:    adcs r1, r3
1268; CHECK-NEXT:    vmov r2, r3, d5
1269; CHECK-NEXT:    vmov q2[2], q2[0], r2, r3
1270; CHECK-NEXT:    vmov q2[3], q2[1], r2, r3
1271; CHECK-NEXT:    vmov.u16 r2, q0[7]
1272; CHECK-NEXT:    vmov.u16 r3, q0[6]
1273; CHECK-NEXT:    sxtb r2, r2
1274; CHECK-NEXT:    sxtb r3, r3
1275; CHECK-NEXT:    vcmp.i32 ne, q2, zr
1276; CHECK-NEXT:    vmov q0[2], q0[0], r3, r2
1277; CHECK-NEXT:    asrs r2, r2, #31
1278; CHECK-NEXT:    asrs r3, r3, #31
1279; CHECK-NEXT:    vmov q0[3], q0[1], r3, r2
1280; CHECK-NEXT:    vpsel q0, q0, q1
1281; CHECK-NEXT:    vmov r2, r3, d0
1282; CHECK-NEXT:    adds r0, r0, r2
1283; CHECK-NEXT:    adcs r1, r3
1284; CHECK-NEXT:    vmov r2, r3, d1
1285; CHECK-NEXT:    adds r0, r0, r2
1286; CHECK-NEXT:    adcs r1, r3
1287; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
1288; CHECK-NEXT:    bx lr
1289entry:
1290  %c = icmp eq <8 x i8> %b, zeroinitializer
1291  %xx = sext <8 x i8> %x to <8 x i64>
1292  %s = select <8 x i1> %c, <8 x i64> %xx, <8 x i64> zeroinitializer
1293  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s)
1294  ret i64 %z
1295}
1296
1297define arm_aapcs_vfpcc i64 @add_v4i8_v4i64_zext(<4 x i8> %x, <4 x i8> %b) {
1298; CHECK-LABEL: add_v4i8_v4i64_zext:
1299; CHECK:       @ %bb.0: @ %entry
1300; CHECK-NEXT:    vmov.i32 q2, #0xff
1301; CHECK-NEXT:    vand q1, q1, q2
1302; CHECK-NEXT:    vand q0, q0, q2
1303; CHECK-NEXT:    vpt.i32 eq, q1, zr
1304; CHECK-NEXT:    vaddlvt.u32 r0, r1, q0
1305; CHECK-NEXT:    bx lr
1306entry:
1307  %c = icmp eq <4 x i8> %b, zeroinitializer
1308  %xx = zext <4 x i8> %x to <4 x i64>
1309  %s = select <4 x i1> %c, <4 x i64> %xx, <4 x i64> zeroinitializer
1310  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s)
1311  ret i64 %z
1312}
1313
1314define arm_aapcs_vfpcc i64 @add_v4i8_v4i64_sext(<4 x i8> %x, <4 x i8> %b) {
1315; CHECK-LABEL: add_v4i8_v4i64_sext:
1316; CHECK:       @ %bb.0: @ %entry
1317; CHECK-NEXT:    vmov.i32 q2, #0xff
1318; CHECK-NEXT:    vmovlb.s8 q0, q0
1319; CHECK-NEXT:    vand q1, q1, q2
1320; CHECK-NEXT:    vmovlb.s16 q0, q0
1321; CHECK-NEXT:    vpt.i32 eq, q1, zr
1322; CHECK-NEXT:    vaddlvt.s32 r0, r1, q0
1323; CHECK-NEXT:    bx lr
1324entry:
1325  %c = icmp eq <4 x i8> %b, zeroinitializer
1326  %xx = sext <4 x i8> %x to <4 x i64>
1327  %s = select <4 x i1> %c, <4 x i64> %xx, <4 x i64> zeroinitializer
1328  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s)
1329  ret i64 %z
1330}
1331
1332define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_zext(<2 x i8> %x, <2 x i8> %b) {
1333; CHECK-LABEL: add_v2i8_v2i64_zext:
1334; CHECK:       @ %bb.0: @ %entry
1335; CHECK-NEXT:    vmov.i64 q2, #0xff
1336; CHECK-NEXT:    movs r1, #0
1337; CHECK-NEXT:    vand q1, q1, q2
1338; CHECK-NEXT:    vand q0, q0, q2
1339; CHECK-NEXT:    vmov r0, s4
1340; CHECK-NEXT:    cmp r0, #0
1341; CHECK-NEXT:    csetm r0, eq
1342; CHECK-NEXT:    bfi r1, r0, #0, #8
1343; CHECK-NEXT:    vmov r0, s6
1344; CHECK-NEXT:    vmov.i32 q1, #0x0
1345; CHECK-NEXT:    cmp r0, #0
1346; CHECK-NEXT:    csetm r0, eq
1347; CHECK-NEXT:    bfi r1, r0, #8, #8
1348; CHECK-NEXT:    vmsr p0, r1
1349; CHECK-NEXT:    vpsel q0, q0, q1
1350; CHECK-NEXT:    vmov r0, r1, d1
1351; CHECK-NEXT:    vmov r2, r3, d0
1352; CHECK-NEXT:    add r0, r2
1353; CHECK-NEXT:    orrs r1, r3
1354; CHECK-NEXT:    bx lr
1355entry:
1356  %c = icmp eq <2 x i8> %b, zeroinitializer
1357  %xx = zext <2 x i8> %x to <2 x i64>
1358  %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
1359  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
1360  ret i64 %z
1361}
1362
1363define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_sext(<2 x i8> %x, <2 x i8> %b) {
1364; CHECK-LABEL: add_v2i8_v2i64_sext:
1365; CHECK:       @ %bb.0: @ %entry
1366; CHECK-NEXT:    vmov.i32 q2, #0xff
1367; CHECK-NEXT:    movs r1, #0
1368; CHECK-NEXT:    vand q1, q1, q2
1369; CHECK-NEXT:    vmov r0, s4
1370; CHECK-NEXT:    cmp r0, #0
1371; CHECK-NEXT:    csetm r0, eq
1372; CHECK-NEXT:    bfi r1, r0, #0, #8
1373; CHECK-NEXT:    vmov r0, s6
1374; CHECK-NEXT:    vmov.i32 q1, #0x0
1375; CHECK-NEXT:    cmp r0, #0
1376; CHECK-NEXT:    csetm r0, eq
1377; CHECK-NEXT:    bfi r1, r0, #8, #8
1378; CHECK-NEXT:    vmov r0, s2
1379; CHECK-NEXT:    vmsr p0, r1
1380; CHECK-NEXT:    vmov r1, s0
1381; CHECK-NEXT:    sxtb r0, r0
1382; CHECK-NEXT:    sxtb r1, r1
1383; CHECK-NEXT:    vmov q0[2], q0[0], r1, r0
1384; CHECK-NEXT:    asrs r0, r0, #31
1385; CHECK-NEXT:    asrs r1, r1, #31
1386; CHECK-NEXT:    vmov q0[3], q0[1], r1, r0
1387; CHECK-NEXT:    vpsel q0, q0, q1
1388; CHECK-NEXT:    vmov r0, r1, d1
1389; CHECK-NEXT:    vmov r2, r3, d0
1390; CHECK-NEXT:    adds r0, r0, r2
1391; CHECK-NEXT:    adcs r1, r3
1392; CHECK-NEXT:    bx lr
1393entry:
1394  %c = icmp eq <2 x i8> %b, zeroinitializer
1395  %xx = sext <2 x i8> %x to <2 x i64>
1396  %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
1397  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
1398  ret i64 %z
1399}
1400
1401define arm_aapcs_vfpcc i64 @add_v2i64_v2i64(<2 x i64> %x, <2 x i64> %b) {
1402; CHECK-LABEL: add_v2i64_v2i64:
1403; CHECK:       @ %bb.0: @ %entry
1404; CHECK-NEXT:    vmov r0, r1, d2
1405; CHECK-NEXT:    orrs r0, r1
1406; CHECK-NEXT:    mov.w r1, #0
1407; CHECK-NEXT:    csetm r0, eq
1408; CHECK-NEXT:    bfi r1, r0, #0, #8
1409; CHECK-NEXT:    vmov r0, r2, d3
1410; CHECK-NEXT:    vmov.i32 q1, #0x0
1411; CHECK-NEXT:    orrs r0, r2
1412; CHECK-NEXT:    csetm r0, eq
1413; CHECK-NEXT:    bfi r1, r0, #8, #8
1414; CHECK-NEXT:    vmsr p0, r1
1415; CHECK-NEXT:    vpsel q0, q0, q1
1416; CHECK-NEXT:    vmov r0, r1, d1
1417; CHECK-NEXT:    vmov r2, r3, d0
1418; CHECK-NEXT:    adds r0, r0, r2
1419; CHECK-NEXT:    adcs r1, r3
1420; CHECK-NEXT:    bx lr
1421entry:
1422  %c = icmp eq <2 x i64> %b, zeroinitializer
1423  %s = select <2 x i1> %c, <2 x i64> %x, <2 x i64> zeroinitializer
1424  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
1425  ret i64 %z
1426}
1427
1428define arm_aapcs_vfpcc i32 @add_v4i32_v4i32_acc(<4 x i32> %x, <4 x i32> %b, i32 %a) {
1429; CHECK-LABEL: add_v4i32_v4i32_acc:
1430; CHECK:       @ %bb.0: @ %entry
1431; CHECK-NEXT:    vpt.i32 eq, q1, zr
1432; CHECK-NEXT:    vaddvat.u32 r0, q0
1433; CHECK-NEXT:    bx lr
1434entry:
1435  %c = icmp eq <4 x i32> %b, zeroinitializer
1436  %s = select <4 x i1> %c, <4 x i32> %x, <4 x i32> zeroinitializer
1437  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
1438  %r = add i32 %z, %a
1439  ret i32 %r
1440}
1441
1442define arm_aapcs_vfpcc i64 @add_v4i32_v4i64_acc_zext(<4 x i32> %x, <4 x i32> %b, i64 %a) {
1443; CHECK-LABEL: add_v4i32_v4i64_acc_zext:
1444; CHECK:       @ %bb.0: @ %entry
1445; CHECK-NEXT:    vpt.i32 eq, q1, zr
1446; CHECK-NEXT:    vaddlvat.u32 r0, r1, q0
1447; CHECK-NEXT:    bx lr
1448entry:
1449  %c = icmp eq <4 x i32> %b, zeroinitializer
1450  %xx = zext <4 x i32> %x to <4 x i64>
1451  %s = select <4 x i1> %c, <4 x i64> %xx, <4 x i64> zeroinitializer
1452  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s)
1453  %r = add i64 %z, %a
1454  ret i64 %r
1455}
1456
1457define arm_aapcs_vfpcc i64 @add_v4i32_v4i64_acc_sext(<4 x i32> %x, <4 x i32> %b, i64 %a) {
1458; CHECK-LABEL: add_v4i32_v4i64_acc_sext:
1459; CHECK:       @ %bb.0: @ %entry
1460; CHECK-NEXT:    vpt.i32 eq, q1, zr
1461; CHECK-NEXT:    vaddlvat.s32 r0, r1, q0
1462; CHECK-NEXT:    bx lr
1463entry:
1464  %c = icmp eq <4 x i32> %b, zeroinitializer
1465  %xx = sext <4 x i32> %x to <4 x i64>
1466  %s = select <4 x i1> %c, <4 x i64> %xx, <4 x i64> zeroinitializer
1467  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s)
1468  %r = add i64 %z, %a
1469  ret i64 %r
1470}
1471
1472define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_acc_zext(<2 x i32> %x, <2 x i32> %b, i64 %a) {
1473; CHECK-LABEL: add_v2i32_v2i64_acc_zext:
1474; CHECK:       @ %bb.0: @ %entry
1475; CHECK-NEXT:    .save {r7, lr}
1476; CHECK-NEXT:    push {r7, lr}
1477; CHECK-NEXT:    vmov r2, s4
1478; CHECK-NEXT:    movs r3, #0
1479; CHECK-NEXT:    vmov.i64 q2, #0xffffffff
1480; CHECK-NEXT:    vand q0, q0, q2
1481; CHECK-NEXT:    cmp r2, #0
1482; CHECK-NEXT:    csetm r2, eq
1483; CHECK-NEXT:    bfi r3, r2, #0, #8
1484; CHECK-NEXT:    vmov r2, s6
1485; CHECK-NEXT:    vmov.i32 q1, #0x0
1486; CHECK-NEXT:    cmp r2, #0
1487; CHECK-NEXT:    csetm r2, eq
1488; CHECK-NEXT:    bfi r3, r2, #8, #8
1489; CHECK-NEXT:    vmsr p0, r3
1490; CHECK-NEXT:    vpsel q0, q0, q1
1491; CHECK-NEXT:    vmov lr, r12, d1
1492; CHECK-NEXT:    vmov r3, r2, d0
1493; CHECK-NEXT:    adds.w r3, r3, lr
1494; CHECK-NEXT:    adc.w r2, r2, r12
1495; CHECK-NEXT:    adds r0, r0, r3
1496; CHECK-NEXT:    adcs r1, r2
1497; CHECK-NEXT:    pop {r7, pc}
1498entry:
1499  %c = icmp eq <2 x i32> %b, zeroinitializer
1500  %xx = zext <2 x i32> %x to <2 x i64>
1501  %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
1502  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
1503  %r = add i64 %z, %a
1504  ret i64 %r
1505}
1506
1507define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_acc_sext(<2 x i32> %x, <2 x i32> %b, i64 %a) {
1508; CHECK-LABEL: add_v2i32_v2i64_acc_sext:
1509; CHECK:       @ %bb.0: @ %entry
1510; CHECK-NEXT:    .save {r7, lr}
1511; CHECK-NEXT:    push {r7, lr}
1512; CHECK-NEXT:    vmov r2, s2
1513; CHECK-NEXT:    vmov r3, s0
1514; CHECK-NEXT:    vmov q0[2], q0[0], r3, r2
1515; CHECK-NEXT:    asrs r2, r2, #31
1516; CHECK-NEXT:    asrs r3, r3, #31
1517; CHECK-NEXT:    vmov q0[3], q0[1], r3, r2
1518; CHECK-NEXT:    vmov r2, s4
1519; CHECK-NEXT:    movs r3, #0
1520; CHECK-NEXT:    cmp r2, #0
1521; CHECK-NEXT:    csetm r2, eq
1522; CHECK-NEXT:    bfi r3, r2, #0, #8
1523; CHECK-NEXT:    vmov r2, s6
1524; CHECK-NEXT:    vmov.i32 q1, #0x0
1525; CHECK-NEXT:    cmp r2, #0
1526; CHECK-NEXT:    csetm r2, eq
1527; CHECK-NEXT:    bfi r3, r2, #8, #8
1528; CHECK-NEXT:    vmsr p0, r3
1529; CHECK-NEXT:    vpsel q0, q0, q1
1530; CHECK-NEXT:    vmov lr, r12, d1
1531; CHECK-NEXT:    vmov r3, r2, d0
1532; CHECK-NEXT:    adds.w r3, r3, lr
1533; CHECK-NEXT:    adc.w r2, r2, r12
1534; CHECK-NEXT:    adds r0, r0, r3
1535; CHECK-NEXT:    adcs r1, r2
1536; CHECK-NEXT:    pop {r7, pc}
1537entry:
1538  %c = icmp eq <2 x i32> %b, zeroinitializer
1539  %xx = sext <2 x i32> %x to <2 x i64>
1540  %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
1541  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
1542  %r = add i64 %z, %a
1543  ret i64 %r
1544}
1545
1546define arm_aapcs_vfpcc i32 @add_v8i16_v8i32_acc_zext(<8 x i16> %x, <8 x i16> %b, i32 %a) {
1547; CHECK-LABEL: add_v8i16_v8i32_acc_zext:
1548; CHECK:       @ %bb.0: @ %entry
1549; CHECK-NEXT:    vpt.i16 eq, q1, zr
1550; CHECK-NEXT:    vaddvat.u16 r0, q0
1551; CHECK-NEXT:    bx lr
1552entry:
1553  %c = icmp eq <8 x i16> %b, zeroinitializer
1554  %xx = zext <8 x i16> %x to <8 x i32>
1555  %s = select <8 x i1> %c, <8 x i32> %xx, <8 x i32> zeroinitializer
1556  %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %s)
1557  %r = add i32 %z, %a
1558  ret i32 %r
1559}
1560
1561define arm_aapcs_vfpcc i32 @add_v8i16_v8i32_acc_sext(<8 x i16> %x, <8 x i16> %b, i32 %a) {
1562; CHECK-LABEL: add_v8i16_v8i32_acc_sext:
1563; CHECK:       @ %bb.0: @ %entry
1564; CHECK-NEXT:    vpt.i16 eq, q1, zr
1565; CHECK-NEXT:    vaddvat.s16 r0, q0
1566; CHECK-NEXT:    bx lr
1567entry:
1568  %c = icmp eq <8 x i16> %b, zeroinitializer
1569  %xx = sext <8 x i16> %x to <8 x i32>
1570  %s = select <8 x i1> %c, <8 x i32> %xx, <8 x i32> zeroinitializer
1571  %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %s)
1572  %r = add i32 %z, %a
1573  ret i32 %r
1574}
1575
1576define arm_aapcs_vfpcc i32 @add_v4i16_v4i32_acc_zext(<4 x i16> %x, <4 x i16> %b, i32 %a) {
1577; CHECK-LABEL: add_v4i16_v4i32_acc_zext:
1578; CHECK:       @ %bb.0: @ %entry
1579; CHECK-NEXT:    vmovlb.u16 q1, q1
1580; CHECK-NEXT:    vmovlb.u16 q0, q0
1581; CHECK-NEXT:    vpt.i32 eq, q1, zr
1582; CHECK-NEXT:    vaddvat.u32 r0, q0
1583; CHECK-NEXT:    bx lr
1584entry:
1585  %c = icmp eq <4 x i16> %b, zeroinitializer
1586  %xx = zext <4 x i16> %x to <4 x i32>
1587  %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer
1588  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
1589  %r = add i32 %z, %a
1590  ret i32 %r
1591}
1592
1593define arm_aapcs_vfpcc i32 @add_v4i16_v4i32_acc_sext(<4 x i16> %x, <4 x i16> %b, i32 %a) {
1594; CHECK-LABEL: add_v4i16_v4i32_acc_sext:
1595; CHECK:       @ %bb.0: @ %entry
1596; CHECK-NEXT:    vmovlb.u16 q1, q1
1597; CHECK-NEXT:    vmovlb.s16 q0, q0
1598; CHECK-NEXT:    vpt.i32 eq, q1, zr
1599; CHECK-NEXT:    vaddvat.u32 r0, q0
1600; CHECK-NEXT:    bx lr
1601entry:
1602  %c = icmp eq <4 x i16> %b, zeroinitializer
1603  %xx = sext <4 x i16> %x to <4 x i32>
1604  %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer
1605  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
1606  %r = add i32 %z, %a
1607  ret i32 %r
1608}
1609
1610define arm_aapcs_vfpcc zeroext i16 @add_v8i16_v8i16_acc(<8 x i16> %x, <8 x i16> %b, i16 %a) {
1611; CHECK-LABEL: add_v8i16_v8i16_acc:
1612; CHECK:       @ %bb.0: @ %entry
1613; CHECK-NEXT:    vpt.i16 eq, q1, zr
1614; CHECK-NEXT:    vaddvat.u16 r0, q0
1615; CHECK-NEXT:    uxth r0, r0
1616; CHECK-NEXT:    bx lr
1617entry:
1618  %c = icmp eq <8 x i16> %b, zeroinitializer
1619  %s = select <8 x i1> %c, <8 x i16> %x, <8 x i16> zeroinitializer
1620  %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s)
1621  %r = add i16 %z, %a
1622  ret i16 %r
1623}
1624
1625define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_acc_zext(<8 x i16> %x, <8 x i16> %b, i64 %a) {
1626; CHECK-LABEL: add_v8i16_v8i64_acc_zext:
1627; CHECK:       @ %bb.0: @ %entry
1628; CHECK-NEXT:    .save {r7, lr}
1629; CHECK-NEXT:    push {r7, lr}
1630; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
1631; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
1632; CHECK-NEXT:    vmov.i8 q3, #0x0
1633; CHECK-NEXT:    vmov.i8 q4, #0xff
1634; CHECK-NEXT:    vcmp.i16 eq, q1, zr
1635; CHECK-NEXT:    vpsel q5, q4, q3
1636; CHECK-NEXT:    vmov.u16 r2, q5[2]
1637; CHECK-NEXT:    vmov.u16 r3, q5[0]
1638; CHECK-NEXT:    vmov q1[2], q1[0], r3, r2
1639; CHECK-NEXT:    vmov.u16 r2, q5[3]
1640; CHECK-NEXT:    vmov.u16 r3, q5[1]
1641; CHECK-NEXT:    vmov q1[3], q1[1], r3, r2
1642; CHECK-NEXT:    vcmp.i32 ne, q1, zr
1643; CHECK-NEXT:    vpsel q6, q4, q3
1644; CHECK-NEXT:    vmov r2, r3, d12
1645; CHECK-NEXT:    vmov q1[2], q1[0], r2, r3
1646; CHECK-NEXT:    vmov q1[3], q1[1], r2, r3
1647; CHECK-NEXT:    vmov.u16 r2, q0[1]
1648; CHECK-NEXT:    vmov.u16 r3, q0[0]
1649; CHECK-NEXT:    vcmp.i32 ne, q1, zr
1650; CHECK-NEXT:    vmov q2[2], q2[0], r3, r2
1651; CHECK-NEXT:    vmov.i64 q1, #0xffff
1652; CHECK-NEXT:    vand q7, q2, q1
1653; CHECK-NEXT:    vmov.i32 q2, #0x0
1654; CHECK-NEXT:    vpsel q7, q7, q2
1655; CHECK-NEXT:    vmov r12, lr, d15
1656; CHECK-NEXT:    vmov r2, r3, d14
1657; CHECK-NEXT:    orr.w lr, lr, r3
1658; CHECK-NEXT:    add r12, r2
1659; CHECK-NEXT:    vmov r3, r2, d13
1660; CHECK-NEXT:    vmov q6[2], q6[0], r3, r2
1661; CHECK-NEXT:    vmov q6[3], q6[1], r3, r2
1662; CHECK-NEXT:    vmov.u16 r2, q0[3]
1663; CHECK-NEXT:    vmov.u16 r3, q0[2]
1664; CHECK-NEXT:    vcmp.i32 ne, q6, zr
1665; CHECK-NEXT:    vmov q6[2], q6[0], r3, r2
1666; CHECK-NEXT:    vand q6, q6, q1
1667; CHECK-NEXT:    vpsel q6, q6, q2
1668; CHECK-NEXT:    vmov r2, r3, d12
1669; CHECK-NEXT:    adds.w r12, r12, r2
1670; CHECK-NEXT:    adc.w lr, lr, r3
1671; CHECK-NEXT:    vmov r2, r3, d13
1672; CHECK-NEXT:    adds.w r12, r12, r2
1673; CHECK-NEXT:    vmov.u16 r2, q5[6]
1674; CHECK-NEXT:    adc.w lr, lr, r3
1675; CHECK-NEXT:    vmov.u16 r3, q5[4]
1676; CHECK-NEXT:    vmov q6[2], q6[0], r3, r2
1677; CHECK-NEXT:    vmov.u16 r2, q5[7]
1678; CHECK-NEXT:    vmov.u16 r3, q5[5]
1679; CHECK-NEXT:    vmov q6[3], q6[1], r3, r2
1680; CHECK-NEXT:    vcmp.i32 ne, q6, zr
1681; CHECK-NEXT:    vpsel q3, q4, q3
1682; CHECK-NEXT:    vmov r2, r3, d6
1683; CHECK-NEXT:    vmov q4[2], q4[0], r2, r3
1684; CHECK-NEXT:    vmov q4[3], q4[1], r2, r3
1685; CHECK-NEXT:    vmov.u16 r2, q0[5]
1686; CHECK-NEXT:    vmov.u16 r3, q0[4]
1687; CHECK-NEXT:    vcmp.i32 ne, q4, zr
1688; CHECK-NEXT:    vmov q4[2], q4[0], r3, r2
1689; CHECK-NEXT:    vand q4, q4, q1
1690; CHECK-NEXT:    vpsel q4, q4, q2
1691; CHECK-NEXT:    vmov r2, r3, d8
1692; CHECK-NEXT:    adds.w r12, r12, r2
1693; CHECK-NEXT:    adc.w lr, lr, r3
1694; CHECK-NEXT:    vmov r2, r3, d9
1695; CHECK-NEXT:    adds.w r12, r12, r2
1696; CHECK-NEXT:    adc.w lr, lr, r3
1697; CHECK-NEXT:    vmov r2, r3, d7
1698; CHECK-NEXT:    vmov q3[2], q3[0], r2, r3
1699; CHECK-NEXT:    vmov q3[3], q3[1], r2, r3
1700; CHECK-NEXT:    vmov.u16 r2, q0[7]
1701; CHECK-NEXT:    vmov.u16 r3, q0[6]
1702; CHECK-NEXT:    vcmp.i32 ne, q3, zr
1703; CHECK-NEXT:    vmov q0[2], q0[0], r3, r2
1704; CHECK-NEXT:    vand q0, q0, q1
1705; CHECK-NEXT:    vpsel q0, q0, q2
1706; CHECK-NEXT:    vmov r2, r3, d0
1707; CHECK-NEXT:    adds.w r12, r12, r2
1708; CHECK-NEXT:    adc.w lr, lr, r3
1709; CHECK-NEXT:    vmov r2, r3, d1
1710; CHECK-NEXT:    adds.w r2, r2, r12
1711; CHECK-NEXT:    adc.w r3, r3, lr
1712; CHECK-NEXT:    adds r0, r0, r2
1713; CHECK-NEXT:    adcs r1, r3
1714; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
1715; CHECK-NEXT:    pop {r7, pc}
1716entry:
1717  %c = icmp eq <8 x i16> %b, zeroinitializer
1718  %xx = zext <8 x i16> %x to <8 x i64>
1719  %s = select <8 x i1> %c, <8 x i64> %xx, <8 x i64> zeroinitializer
1720  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s)
1721  %r = add i64 %z, %a
1722  ret i64 %r
1723}
1724
1725define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_acc_sext(<8 x i16> %x, <8 x i16> %b, i64 %a) {
1726; CHECK-LABEL: add_v8i16_v8i64_acc_sext:
1727; CHECK:       @ %bb.0: @ %entry
1728; CHECK-NEXT:    .save {r7, lr}
1729; CHECK-NEXT:    push {r7, lr}
1730; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
1731; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
1732; CHECK-NEXT:    vmov.i8 q2, #0x0
1733; CHECK-NEXT:    vmov.i8 q3, #0xff
1734; CHECK-NEXT:    vcmp.i16 eq, q1, zr
1735; CHECK-NEXT:    vpsel q4, q3, q2
1736; CHECK-NEXT:    vmov.u16 r2, q4[2]
1737; CHECK-NEXT:    vmov.u16 r3, q4[0]
1738; CHECK-NEXT:    vmov q1[2], q1[0], r3, r2
1739; CHECK-NEXT:    vmov.u16 r2, q4[3]
1740; CHECK-NEXT:    vmov.u16 r3, q4[1]
1741; CHECK-NEXT:    vmov q1[3], q1[1], r3, r2
1742; CHECK-NEXT:    vcmp.i32 ne, q1, zr
1743; CHECK-NEXT:    vpsel q5, q3, q2
1744; CHECK-NEXT:    vmov r2, r3, d10
1745; CHECK-NEXT:    vmov q1[2], q1[0], r2, r3
1746; CHECK-NEXT:    vmov q1[3], q1[1], r2, r3
1747; CHECK-NEXT:    vmov.s16 r2, q0[1]
1748; CHECK-NEXT:    vmov.s16 r3, q0[0]
1749; CHECK-NEXT:    vcmp.i32 ne, q1, zr
1750; CHECK-NEXT:    vmov q6[2], q6[0], r3, r2
1751; CHECK-NEXT:    asrs r2, r2, #31
1752; CHECK-NEXT:    asrs r3, r3, #31
1753; CHECK-NEXT:    vmov.i32 q1, #0x0
1754; CHECK-NEXT:    vmov q6[3], q6[1], r3, r2
1755; CHECK-NEXT:    vpsel q6, q6, q1
1756; CHECK-NEXT:    vmov lr, r12, d13
1757; CHECK-NEXT:    vmov r3, r2, d12
1758; CHECK-NEXT:    adds.w lr, lr, r3
1759; CHECK-NEXT:    adc.w r12, r12, r2
1760; CHECK-NEXT:    vmov r2, r3, d11
1761; CHECK-NEXT:    vmov q5[2], q5[0], r2, r3
1762; CHECK-NEXT:    vmov q5[3], q5[1], r2, r3
1763; CHECK-NEXT:    vmov.s16 r2, q0[3]
1764; CHECK-NEXT:    vmov.s16 r3, q0[2]
1765; CHECK-NEXT:    vcmp.i32 ne, q5, zr
1766; CHECK-NEXT:    vmov q5[2], q5[0], r3, r2
1767; CHECK-NEXT:    asrs r2, r2, #31
1768; CHECK-NEXT:    asrs r3, r3, #31
1769; CHECK-NEXT:    vmov q5[3], q5[1], r3, r2
1770; CHECK-NEXT:    vpsel q5, q5, q1
1771; CHECK-NEXT:    vmov r2, r3, d10
1772; CHECK-NEXT:    adds.w lr, lr, r2
1773; CHECK-NEXT:    adc.w r12, r12, r3
1774; CHECK-NEXT:    vmov r2, r3, d11
1775; CHECK-NEXT:    adds.w lr, lr, r2
1776; CHECK-NEXT:    vmov.u16 r2, q4[6]
1777; CHECK-NEXT:    adc.w r12, r12, r3
1778; CHECK-NEXT:    vmov.u16 r3, q4[4]
1779; CHECK-NEXT:    vmov q5[2], q5[0], r3, r2
1780; CHECK-NEXT:    vmov.u16 r2, q4[7]
1781; CHECK-NEXT:    vmov.u16 r3, q4[5]
1782; CHECK-NEXT:    vmov q5[3], q5[1], r3, r2
1783; CHECK-NEXT:    vcmp.i32 ne, q5, zr
1784; CHECK-NEXT:    vpsel q2, q3, q2
1785; CHECK-NEXT:    vmov r2, r3, d4
1786; CHECK-NEXT:    vmov q3[2], q3[0], r2, r3
1787; CHECK-NEXT:    vmov q3[3], q3[1], r2, r3
1788; CHECK-NEXT:    vmov.s16 r2, q0[5]
1789; CHECK-NEXT:    vmov.s16 r3, q0[4]
1790; CHECK-NEXT:    vcmp.i32 ne, q3, zr
1791; CHECK-NEXT:    vmov q3[2], q3[0], r3, r2
1792; CHECK-NEXT:    asrs r2, r2, #31
1793; CHECK-NEXT:    asrs r3, r3, #31
1794; CHECK-NEXT:    vmov q3[3], q3[1], r3, r2
1795; CHECK-NEXT:    vpsel q3, q3, q1
1796; CHECK-NEXT:    vmov r2, r3, d6
1797; CHECK-NEXT:    adds.w lr, lr, r2
1798; CHECK-NEXT:    adc.w r12, r12, r3
1799; CHECK-NEXT:    vmov r2, r3, d7
1800; CHECK-NEXT:    adds.w lr, lr, r2
1801; CHECK-NEXT:    adc.w r12, r12, r3
1802; CHECK-NEXT:    vmov r2, r3, d5
1803; CHECK-NEXT:    vmov q2[2], q2[0], r2, r3
1804; CHECK-NEXT:    vmov q2[3], q2[1], r2, r3
1805; CHECK-NEXT:    vmov.s16 r2, q0[7]
1806; CHECK-NEXT:    vmov.s16 r3, q0[6]
1807; CHECK-NEXT:    vcmp.i32 ne, q2, zr
1808; CHECK-NEXT:    vmov q0[2], q0[0], r3, r2
1809; CHECK-NEXT:    asrs r2, r2, #31
1810; CHECK-NEXT:    asrs r3, r3, #31
1811; CHECK-NEXT:    vmov q0[3], q0[1], r3, r2
1812; CHECK-NEXT:    vpsel q0, q0, q1
1813; CHECK-NEXT:    vmov r2, r3, d0
1814; CHECK-NEXT:    adds.w lr, lr, r2
1815; CHECK-NEXT:    adc.w r12, r12, r3
1816; CHECK-NEXT:    vmov r2, r3, d1
1817; CHECK-NEXT:    adds.w r2, r2, lr
1818; CHECK-NEXT:    adc.w r3, r3, r12
1819; CHECK-NEXT:    adds r0, r0, r2
1820; CHECK-NEXT:    adcs r1, r3
1821; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
1822; CHECK-NEXT:    pop {r7, pc}
1823entry:
1824  %c = icmp eq <8 x i16> %b, zeroinitializer
1825  %xx = sext <8 x i16> %x to <8 x i64>
1826  %s = select <8 x i1> %c, <8 x i64> %xx, <8 x i64> zeroinitializer
1827  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s)
1828  %r = add i64 %z, %a
1829  ret i64 %r
1830}
1831
1832define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_acc_zext(<2 x i16> %x, <2 x i16> %b, i64 %a) {
1833; CHECK-LABEL: add_v2i16_v2i64_acc_zext:
1834; CHECK:       @ %bb.0: @ %entry
1835; CHECK-NEXT:    .save {r7, lr}
1836; CHECK-NEXT:    push {r7, lr}
1837; CHECK-NEXT:    vmov.i64 q2, #0xffff
1838; CHECK-NEXT:    movs r3, #0
1839; CHECK-NEXT:    vand q1, q1, q2
1840; CHECK-NEXT:    vand q0, q0, q2
1841; CHECK-NEXT:    vmov r2, s4
1842; CHECK-NEXT:    cmp r2, #0
1843; CHECK-NEXT:    csetm r2, eq
1844; CHECK-NEXT:    bfi r3, r2, #0, #8
1845; CHECK-NEXT:    vmov r2, s6
1846; CHECK-NEXT:    vmov.i32 q1, #0x0
1847; CHECK-NEXT:    cmp r2, #0
1848; CHECK-NEXT:    csetm r2, eq
1849; CHECK-NEXT:    bfi r3, r2, #8, #8
1850; CHECK-NEXT:    vmsr p0, r3
1851; CHECK-NEXT:    vpsel q0, q0, q1
1852; CHECK-NEXT:    vmov r12, lr, d1
1853; CHECK-NEXT:    vmov r2, r3, d0
1854; CHECK-NEXT:    add r2, r12
1855; CHECK-NEXT:    orr.w r3, r3, lr
1856; CHECK-NEXT:    adds r0, r0, r2
1857; CHECK-NEXT:    adcs r1, r3
1858; CHECK-NEXT:    pop {r7, pc}
1859entry:
1860  %c = icmp eq <2 x i16> %b, zeroinitializer
1861  %xx = zext <2 x i16> %x to <2 x i64>
1862  %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
1863  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
1864  %r = add i64 %z, %a
1865  ret i64 %r
1866}
1867
1868define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_acc_sext(<2 x i16> %x, <2 x i16> %b, i64 %a) {
1869; CHECK-LABEL: add_v2i16_v2i64_acc_sext:
1870; CHECK:       @ %bb.0: @ %entry
1871; CHECK-NEXT:    .save {r7, lr}
1872; CHECK-NEXT:    push {r7, lr}
1873; CHECK-NEXT:    vmov.i32 q2, #0xffff
1874; CHECK-NEXT:    movs r3, #0
1875; CHECK-NEXT:    vand q1, q1, q2
1876; CHECK-NEXT:    vmov r2, s4
1877; CHECK-NEXT:    cmp r2, #0
1878; CHECK-NEXT:    csetm r2, eq
1879; CHECK-NEXT:    bfi r3, r2, #0, #8
1880; CHECK-NEXT:    vmov r2, s6
1881; CHECK-NEXT:    vmov.i32 q1, #0x0
1882; CHECK-NEXT:    cmp r2, #0
1883; CHECK-NEXT:    csetm r2, eq
1884; CHECK-NEXT:    bfi r3, r2, #8, #8
1885; CHECK-NEXT:    vmov r2, s2
1886; CHECK-NEXT:    vmsr p0, r3
1887; CHECK-NEXT:    vmov r3, s0
1888; CHECK-NEXT:    sxth r2, r2
1889; CHECK-NEXT:    sxth r3, r3
1890; CHECK-NEXT:    vmov q0[2], q0[0], r3, r2
1891; CHECK-NEXT:    asrs r2, r2, #31
1892; CHECK-NEXT:    asrs r3, r3, #31
1893; CHECK-NEXT:    vmov q0[3], q0[1], r3, r2
1894; CHECK-NEXT:    vpsel q0, q0, q1
1895; CHECK-NEXT:    vmov lr, r12, d1
1896; CHECK-NEXT:    vmov r3, r2, d0
1897; CHECK-NEXT:    adds.w r3, r3, lr
1898; CHECK-NEXT:    adc.w r2, r2, r12
1899; CHECK-NEXT:    adds r0, r0, r3
1900; CHECK-NEXT:    adcs r1, r2
1901; CHECK-NEXT:    pop {r7, pc}
1902entry:
1903  %c = icmp eq <2 x i16> %b, zeroinitializer
1904  %xx = sext <2 x i16> %x to <2 x i64>
1905  %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
1906  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
1907  %r = add i64 %z, %a
1908  ret i64 %r
1909}
1910
1911define arm_aapcs_vfpcc i32 @add_v16i8_v16i32_acc_zext(<16 x i8> %x, <16 x i8> %b, i32 %a) {
1912; CHECK-LABEL: add_v16i8_v16i32_acc_zext:
1913; CHECK:       @ %bb.0: @ %entry
1914; CHECK-NEXT:    vpt.i8 eq, q1, zr
1915; CHECK-NEXT:    vaddvat.u8 r0, q0
1916; CHECK-NEXT:    bx lr
1917entry:
1918  %c = icmp eq <16 x i8> %b, zeroinitializer
1919  %xx = zext <16 x i8> %x to <16 x i32>
1920  %s = select <16 x i1> %c, <16 x i32> %xx, <16 x i32> zeroinitializer
1921  %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %s)
1922  %r = add i32 %z, %a
1923  ret i32 %r
1924}
1925
1926define arm_aapcs_vfpcc i32 @add_v16i8_v16i32_acc_sext(<16 x i8> %x, <16 x i8> %b, i32 %a) {
1927; CHECK-LABEL: add_v16i8_v16i32_acc_sext:
1928; CHECK:       @ %bb.0: @ %entry
1929; CHECK-NEXT:    vpt.i8 eq, q1, zr
1930; CHECK-NEXT:    vaddvat.s8 r0, q0
1931; CHECK-NEXT:    bx lr
1932entry:
1933  %c = icmp eq <16 x i8> %b, zeroinitializer
1934  %xx = sext <16 x i8> %x to <16 x i32>
1935  %s = select <16 x i1> %c, <16 x i32> %xx, <16 x i32> zeroinitializer
1936  %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %s)
1937  %r = add i32 %z, %a
1938  ret i32 %r
1939}
1940
1941define arm_aapcs_vfpcc i32 @add_v4i8_v4i32_acc_zext(<4 x i8> %x, <4 x i8> %b, i32 %a) {
1942; CHECK-LABEL: add_v4i8_v4i32_acc_zext:
1943; CHECK:       @ %bb.0: @ %entry
1944; CHECK-NEXT:    vmov.i32 q2, #0xff
1945; CHECK-NEXT:    vand q1, q1, q2
1946; CHECK-NEXT:    vand q0, q0, q2
1947; CHECK-NEXT:    vpt.i32 eq, q1, zr
1948; CHECK-NEXT:    vaddvat.u32 r0, q0
1949; CHECK-NEXT:    bx lr
1950entry:
1951  %c = icmp eq <4 x i8> %b, zeroinitializer
1952  %xx = zext <4 x i8> %x to <4 x i32>
1953  %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer
1954  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
1955  %r = add i32 %z, %a
1956  ret i32 %r
1957}
1958
1959define arm_aapcs_vfpcc i32 @add_v4i8_v4i32_acc_sext(<4 x i8> %x, <4 x i8> %b, i32 %a) {
1960; CHECK-LABEL: add_v4i8_v4i32_acc_sext:
1961; CHECK:       @ %bb.0: @ %entry
1962; CHECK-NEXT:    vmov.i32 q2, #0xff
1963; CHECK-NEXT:    vmovlb.s8 q0, q0
1964; CHECK-NEXT:    vand q1, q1, q2
1965; CHECK-NEXT:    vmovlb.s16 q0, q0
1966; CHECK-NEXT:    vpt.i32 eq, q1, zr
1967; CHECK-NEXT:    vaddvat.u32 r0, q0
1968; CHECK-NEXT:    bx lr
1969entry:
1970  %c = icmp eq <4 x i8> %b, zeroinitializer
1971  %xx = sext <4 x i8> %x to <4 x i32>
1972  %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer
1973  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s)
1974  %r = add i32 %z, %a
1975  ret i32 %r
1976}
1977
1978define arm_aapcs_vfpcc zeroext i16 @add_v16i8_v16i16_acc_zext(<16 x i8> %x, <16 x i8> %b, i16 %a) {
1979; CHECK-LABEL: add_v16i8_v16i16_acc_zext:
1980; CHECK:       @ %bb.0: @ %entry
1981; CHECK-NEXT:    vpt.i8 eq, q1, zr
1982; CHECK-NEXT:    vaddvat.u8 r0, q0
1983; CHECK-NEXT:    uxth r0, r0
1984; CHECK-NEXT:    bx lr
1985entry:
1986  %c = icmp eq <16 x i8> %b, zeroinitializer
1987  %xx = zext <16 x i8> %x to <16 x i16>
1988  %s = select <16 x i1> %c, <16 x i16> %xx, <16 x i16> zeroinitializer
1989  %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %s)
1990  %r = add i16 %z, %a
1991  ret i16 %r
1992}
1993
1994define arm_aapcs_vfpcc signext i16 @add_v16i8_v16i16_acc_sext(<16 x i8> %x, <16 x i8> %b, i16 %a) {
1995; CHECK-LABEL: add_v16i8_v16i16_acc_sext:
1996; CHECK:       @ %bb.0: @ %entry
1997; CHECK-NEXT:    vpt.i8 eq, q1, zr
1998; CHECK-NEXT:    vaddvat.s8 r0, q0
1999; CHECK-NEXT:    sxth r0, r0
2000; CHECK-NEXT:    bx lr
2001entry:
2002  %c = icmp eq <16 x i8> %b, zeroinitializer
2003  %xx = sext <16 x i8> %x to <16 x i16>
2004  %s = select <16 x i1> %c, <16 x i16> %xx, <16 x i16> zeroinitializer
2005  %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %s)
2006  %r = add i16 %z, %a
2007  ret i16 %r
2008}
2009
2010define arm_aapcs_vfpcc zeroext i16 @add_v8i8_v8i16_acc_zext(<8 x i8> %x, <8 x i8> %b, i16 %a) {
2011; CHECK-LABEL: add_v8i8_v8i16_acc_zext:
2012; CHECK:       @ %bb.0: @ %entry
2013; CHECK-NEXT:    vmovlb.u8 q1, q1
2014; CHECK-NEXT:    vmovlb.u8 q0, q0
2015; CHECK-NEXT:    vpt.i16 eq, q1, zr
2016; CHECK-NEXT:    vaddvat.u16 r0, q0
2017; CHECK-NEXT:    uxth r0, r0
2018; CHECK-NEXT:    bx lr
2019entry:
2020  %c = icmp eq <8 x i8> %b, zeroinitializer
2021  %xx = zext <8 x i8> %x to <8 x i16>
2022  %s = select <8 x i1> %c, <8 x i16> %xx, <8 x i16> zeroinitializer
2023  %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s)
2024  %r = add i16 %z, %a
2025  ret i16 %r
2026}
2027
2028define arm_aapcs_vfpcc signext i16 @add_v8i8_v8i16_acc_sext(<8 x i8> %x, <8 x i8> %b, i16 %a) {
2029; CHECK-LABEL: add_v8i8_v8i16_acc_sext:
2030; CHECK:       @ %bb.0: @ %entry
2031; CHECK-NEXT:    vmovlb.u8 q1, q1
2032; CHECK-NEXT:    vmovlb.s8 q0, q0
2033; CHECK-NEXT:    vpt.i16 eq, q1, zr
2034; CHECK-NEXT:    vaddvat.u16 r0, q0
2035; CHECK-NEXT:    sxth r0, r0
2036; CHECK-NEXT:    bx lr
2037entry:
2038  %c = icmp eq <8 x i8> %b, zeroinitializer
2039  %xx = sext <8 x i8> %x to <8 x i16>
2040  %s = select <8 x i1> %c, <8 x i16> %xx, <8 x i16> zeroinitializer
2041  %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s)
2042  %r = add i16 %z, %a
2043  ret i16 %r
2044}
2045
2046define arm_aapcs_vfpcc zeroext i8 @add_v16i8_v16i8_acc(<16 x i8> %x, <16 x i8> %b, i8 %a) {
2047; CHECK-LABEL: add_v16i8_v16i8_acc:
2048; CHECK:       @ %bb.0: @ %entry
2049; CHECK-NEXT:    vpt.i8 eq, q1, zr
2050; CHECK-NEXT:    vaddvat.u8 r0, q0
2051; CHECK-NEXT:    uxtb r0, r0
2052; CHECK-NEXT:    bx lr
2053entry:
2054  %c = icmp eq <16 x i8> %b, zeroinitializer
2055  %s = select <16 x i1> %c, <16 x i8> %x, <16 x i8> zeroinitializer
2056  %z = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %s)
2057  %r = add i8 %z, %a
2058  ret i8 %r
2059}
2060
2061define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_zext(<16 x i8> %x, <16 x i8> %b, i64 %a) {
2062; CHECK-LABEL: add_v16i8_v16i64_acc_zext:
2063; CHECK:       @ %bb.0: @ %entry
2064; CHECK-NEXT:    .save {r7, lr}
2065; CHECK-NEXT:    push {r7, lr}
2066; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
2067; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
2068; CHECK-NEXT:    .pad #16
2069; CHECK-NEXT:    sub sp, #16
2070; CHECK-NEXT:    vmov q2, q0
2071; CHECK-NEXT:    vcmp.i8 eq, q1, zr
2072; CHECK-NEXT:    vmov.i8 q0, #0x0
2073; CHECK-NEXT:    vmov.i8 q1, #0xff
2074; CHECK-NEXT:    vpsel q5, q1, q0
2075; CHECK-NEXT:    vstrw.32 q0, [sp] @ 16-byte Spill
2076; CHECK-NEXT:    vmov.u8 r2, q5[0]
2077; CHECK-NEXT:    vmov.16 q3[0], r2
2078; CHECK-NEXT:    vmov.u8 r2, q5[1]
2079; CHECK-NEXT:    vmov.16 q3[1], r2
2080; CHECK-NEXT:    vmov.u8 r2, q5[2]
2081; CHECK-NEXT:    vmov.16 q3[2], r2
2082; CHECK-NEXT:    vmov.u8 r2, q5[3]
2083; CHECK-NEXT:    vmov.16 q3[3], r2
2084; CHECK-NEXT:    vmov.u8 r2, q5[4]
2085; CHECK-NEXT:    vmov.16 q3[4], r2
2086; CHECK-NEXT:    vmov.u8 r2, q5[5]
2087; CHECK-NEXT:    vmov.16 q3[5], r2
2088; CHECK-NEXT:    vmov.u8 r2, q5[6]
2089; CHECK-NEXT:    vmov.16 q3[6], r2
2090; CHECK-NEXT:    vmov.u8 r2, q5[7]
2091; CHECK-NEXT:    vmov.16 q3[7], r2
2092; CHECK-NEXT:    vcmp.i16 ne, q3, zr
2093; CHECK-NEXT:    vpsel q6, q1, q0
2094; CHECK-NEXT:    vmov.u16 r2, q6[2]
2095; CHECK-NEXT:    vmov.u16 r3, q6[0]
2096; CHECK-NEXT:    vmov q3[2], q3[0], r3, r2
2097; CHECK-NEXT:    vmov.u16 r2, q6[3]
2098; CHECK-NEXT:    vmov.u16 r3, q6[1]
2099; CHECK-NEXT:    vmov q3[3], q3[1], r3, r2
2100; CHECK-NEXT:    vcmp.i32 ne, q3, zr
2101; CHECK-NEXT:    vpsel q7, q1, q0
2102; CHECK-NEXT:    vmov r2, r3, d14
2103; CHECK-NEXT:    vmov q3[2], q3[0], r2, r3
2104; CHECK-NEXT:    vmov q3[3], q3[1], r2, r3
2105; CHECK-NEXT:    vmov.u8 r2, q2[1]
2106; CHECK-NEXT:    vmov.u8 r3, q2[0]
2107; CHECK-NEXT:    vcmp.i32 ne, q3, zr
2108; CHECK-NEXT:    vmov q4[2], q4[0], r3, r2
2109; CHECK-NEXT:    vmov.i64 q3, #0xff
2110; CHECK-NEXT:    vand q0, q4, q3
2111; CHECK-NEXT:    vmov.i32 q4, #0x0
2112; CHECK-NEXT:    vpsel q0, q0, q4
2113; CHECK-NEXT:    vmov r12, lr, d1
2114; CHECK-NEXT:    vmov r2, r3, d0
2115; CHECK-NEXT:    orr.w lr, lr, r3
2116; CHECK-NEXT:    add r12, r2
2117; CHECK-NEXT:    vmov r3, r2, d15
2118; CHECK-NEXT:    vldrw.u32 q7, [sp] @ 16-byte Reload
2119; CHECK-NEXT:    vmov q0[2], q0[0], r3, r2
2120; CHECK-NEXT:    vmov q0[3], q0[1], r3, r2
2121; CHECK-NEXT:    vmov.u8 r2, q2[3]
2122; CHECK-NEXT:    vmov.u8 r3, q2[2]
2123; CHECK-NEXT:    vcmp.i32 ne, q0, zr
2124; CHECK-NEXT:    vmov q0[2], q0[0], r3, r2
2125; CHECK-NEXT:    vand q0, q0, q3
2126; CHECK-NEXT:    vpsel q0, q0, q4
2127; CHECK-NEXT:    vmov r2, r3, d0
2128; CHECK-NEXT:    adds.w r12, r12, r2
2129; CHECK-NEXT:    adc.w lr, lr, r3
2130; CHECK-NEXT:    vmov r2, r3, d1
2131; CHECK-NEXT:    adds.w r12, r12, r2
2132; CHECK-NEXT:    vmov.u16 r2, q6[6]
2133; CHECK-NEXT:    adc.w lr, lr, r3
2134; CHECK-NEXT:    vmov.u16 r3, q6[4]
2135; CHECK-NEXT:    vmov q0[2], q0[0], r3, r2
2136; CHECK-NEXT:    vmov.u16 r2, q6[7]
2137; CHECK-NEXT:    vmov.u16 r3, q6[5]
2138; CHECK-NEXT:    vmov q0[3], q0[1], r3, r2
2139; CHECK-NEXT:    vcmp.i32 ne, q0, zr
2140; CHECK-NEXT:    vpsel q6, q1, q7
2141; CHECK-NEXT:    vmov r2, r3, d12
2142; CHECK-NEXT:    vmov q0[2], q0[0], r2, r3
2143; CHECK-NEXT:    vmov q0[3], q0[1], r2, r3
2144; CHECK-NEXT:    vmov.u8 r2, q2[5]
2145; CHECK-NEXT:    vmov.u8 r3, q2[4]
2146; CHECK-NEXT:    vcmp.i32 ne, q0, zr
2147; CHECK-NEXT:    vmov q0[2], q0[0], r3, r2
2148; CHECK-NEXT:    vand q0, q0, q3
2149; CHECK-NEXT:    vpsel q0, q0, q4
2150; CHECK-NEXT:    vmov r2, r3, d0
2151; CHECK-NEXT:    adds.w r12, r12, r2
2152; CHECK-NEXT:    adc.w lr, lr, r3
2153; CHECK-NEXT:    vmov r2, r3, d1
2154; CHECK-NEXT:    adds.w r12, r12, r2
2155; CHECK-NEXT:    adc.w lr, lr, r3
2156; CHECK-NEXT:    vmov r2, r3, d13
2157; CHECK-NEXT:    vmov q0[2], q0[0], r2, r3
2158; CHECK-NEXT:    vmov q0[3], q0[1], r2, r3
2159; CHECK-NEXT:    vmov.u8 r2, q2[7]
2160; CHECK-NEXT:    vmov.u8 r3, q2[6]
2161; CHECK-NEXT:    vcmp.i32 ne, q0, zr
2162; CHECK-NEXT:    vmov q0[2], q0[0], r3, r2
2163; CHECK-NEXT:    vand q0, q0, q3
2164; CHECK-NEXT:    vpsel q0, q0, q4
2165; CHECK-NEXT:    vmov r2, r3, d0
2166; CHECK-NEXT:    adds.w r12, r12, r2
2167; CHECK-NEXT:    adc.w lr, lr, r3
2168; CHECK-NEXT:    vmov r2, r3, d1
2169; CHECK-NEXT:    adds.w r12, r12, r2
2170; CHECK-NEXT:    vmov.u8 r2, q5[8]
2171; CHECK-NEXT:    vmov.16 q6[0], r2
2172; CHECK-NEXT:    vmov.u8 r2, q5[9]
2173; CHECK-NEXT:    vmov.16 q6[1], r2
2174; CHECK-NEXT:    vmov.u8 r2, q5[10]
2175; CHECK-NEXT:    vmov.16 q6[2], r2
2176; CHECK-NEXT:    vmov.u8 r2, q5[11]
2177; CHECK-NEXT:    vmov.16 q6[3], r2
2178; CHECK-NEXT:    vmov.u8 r2, q5[12]
2179; CHECK-NEXT:    vmov.16 q6[4], r2
2180; CHECK-NEXT:    vmov.u8 r2, q5[13]
2181; CHECK-NEXT:    vmov.16 q6[5], r2
2182; CHECK-NEXT:    vmov.u8 r2, q5[14]
2183; CHECK-NEXT:    vmov.16 q6[6], r2
2184; CHECK-NEXT:    vmov.u8 r2, q5[15]
2185; CHECK-NEXT:    vmov.16 q6[7], r2
2186; CHECK-NEXT:    adc.w lr, lr, r3
2187; CHECK-NEXT:    vcmp.i16 ne, q6, zr
2188; CHECK-NEXT:    vpsel q5, q1, q7
2189; CHECK-NEXT:    vmov.u16 r2, q5[2]
2190; CHECK-NEXT:    vmov.u16 r3, q5[0]
2191; CHECK-NEXT:    vmov q0[2], q0[0], r3, r2
2192; CHECK-NEXT:    vmov.u16 r2, q5[3]
2193; CHECK-NEXT:    vmov.u16 r3, q5[1]
2194; CHECK-NEXT:    vmov q0[3], q0[1], r3, r2
2195; CHECK-NEXT:    vcmp.i32 ne, q0, zr
2196; CHECK-NEXT:    vpsel q6, q1, q7
2197; CHECK-NEXT:    vmov r2, r3, d12
2198; CHECK-NEXT:    vmov q0[2], q0[0], r2, r3
2199; CHECK-NEXT:    vmov q0[3], q0[1], r2, r3
2200; CHECK-NEXT:    vmov.u8 r2, q2[9]
2201; CHECK-NEXT:    vmov.u8 r3, q2[8]
2202; CHECK-NEXT:    vcmp.i32 ne, q0, zr
2203; CHECK-NEXT:    vmov q0[2], q0[0], r3, r2
2204; CHECK-NEXT:    vand q0, q0, q3
2205; CHECK-NEXT:    vpsel q0, q0, q4
2206; CHECK-NEXT:    vmov r2, r3, d0
2207; CHECK-NEXT:    adds.w r12, r12, r2
2208; CHECK-NEXT:    adc.w lr, lr, r3
2209; CHECK-NEXT:    vmov r2, r3, d1
2210; CHECK-NEXT:    adds.w r12, r12, r2
2211; CHECK-NEXT:    adc.w lr, lr, r3
2212; CHECK-NEXT:    vmov r2, r3, d13
2213; CHECK-NEXT:    vmov q0[2], q0[0], r2, r3
2214; CHECK-NEXT:    vmov q0[3], q0[1], r2, r3
2215; CHECK-NEXT:    vmov.u8 r2, q2[11]
2216; CHECK-NEXT:    vmov.u8 r3, q2[10]
2217; CHECK-NEXT:    vcmp.i32 ne, q0, zr
2218; CHECK-NEXT:    vmov q0[2], q0[0], r3, r2
2219; CHECK-NEXT:    vand q0, q0, q3
2220; CHECK-NEXT:    vpsel q0, q0, q4
2221; CHECK-NEXT:    vmov r2, r3, d0
2222; CHECK-NEXT:    adds.w r12, r12, r2
2223; CHECK-NEXT:    adc.w lr, lr, r3
2224; CHECK-NEXT:    vmov r2, r3, d1
2225; CHECK-NEXT:    adds.w r12, r12, r2
2226; CHECK-NEXT:    vmov.u16 r2, q5[6]
2227; CHECK-NEXT:    adc.w lr, lr, r3
2228; CHECK-NEXT:    vmov.u16 r3, q5[4]
2229; CHECK-NEXT:    vmov q0[2], q0[0], r3, r2
2230; CHECK-NEXT:    vmov.u16 r2, q5[7]
2231; CHECK-NEXT:    vmov.u16 r3, q5[5]
2232; CHECK-NEXT:    vmov q0[3], q0[1], r3, r2
2233; CHECK-NEXT:    vcmp.i32 ne, q0, zr
2234; CHECK-NEXT:    vpsel q1, q1, q7
2235; CHECK-NEXT:    vmov r2, r3, d2
2236; CHECK-NEXT:    vmov q0[2], q0[0], r2, r3
2237; CHECK-NEXT:    vmov q0[3], q0[1], r2, r3
2238; CHECK-NEXT:    vmov.u8 r2, q2[13]
2239; CHECK-NEXT:    vmov.u8 r3, q2[12]
2240; CHECK-NEXT:    vcmp.i32 ne, q0, zr
2241; CHECK-NEXT:    vmov q0[2], q0[0], r3, r2
2242; CHECK-NEXT:    vand q0, q0, q3
2243; CHECK-NEXT:    vpsel q0, q0, q4
2244; CHECK-NEXT:    vmov r2, r3, d0
2245; CHECK-NEXT:    adds.w r12, r12, r2
2246; CHECK-NEXT:    adc.w lr, lr, r3
2247; CHECK-NEXT:    vmov r2, r3, d1
2248; CHECK-NEXT:    adds.w r12, r12, r2
2249; CHECK-NEXT:    adc.w lr, lr, r3
2250; CHECK-NEXT:    vmov r2, r3, d3
2251; CHECK-NEXT:    vmov q0[2], q0[0], r2, r3
2252; CHECK-NEXT:    vmov q0[3], q0[1], r2, r3
2253; CHECK-NEXT:    vmov.u8 r2, q2[15]
2254; CHECK-NEXT:    vmov.u8 r3, q2[14]
2255; CHECK-NEXT:    vcmp.i32 ne, q0, zr
2256; CHECK-NEXT:    vmov q0[2], q0[0], r3, r2
2257; CHECK-NEXT:    vand q0, q0, q3
2258; CHECK-NEXT:    vpsel q0, q0, q4
2259; CHECK-NEXT:    vmov r2, r3, d0
2260; CHECK-NEXT:    adds.w r12, r12, r2
2261; CHECK-NEXT:    adc.w lr, lr, r3
2262; CHECK-NEXT:    vmov r2, r3, d1
2263; CHECK-NEXT:    adds.w r2, r2, r12
2264; CHECK-NEXT:    adc.w r3, r3, lr
2265; CHECK-NEXT:    adds r0, r0, r2
2266; CHECK-NEXT:    adcs r1, r3
2267; CHECK-NEXT:    add sp, #16
2268; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
2269; CHECK-NEXT:    pop {r7, pc}
2270entry:
2271  %c = icmp eq <16 x i8> %b, zeroinitializer
2272  %xx = zext <16 x i8> %x to <16 x i64>
2273  %s = select <16 x i1> %c, <16 x i64> %xx, <16 x i64> zeroinitializer
2274  %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %s)
2275  %r = add i64 %z, %a
2276  ret i64 %r
2277}
2278
2279define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %b, i64 %a) {
2280; CHECK-LABEL: add_v16i8_v16i64_acc_sext:
2281; CHECK:       @ %bb.0: @ %entry
2282; CHECK-NEXT:    .save {r7, lr}
2283; CHECK-NEXT:    push {r7, lr}
2284; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
2285; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
2286; CHECK-NEXT:    vcmp.i8 eq, q1, zr
2287; CHECK-NEXT:    vmov.i8 q1, #0x0
2288; CHECK-NEXT:    vmov.i8 q2, #0xff
2289; CHECK-NEXT:    vpsel q4, q2, q1
2290; CHECK-NEXT:    vmov.u8 r2, q4[0]
2291; CHECK-NEXT:    vmov.16 q3[0], r2
2292; CHECK-NEXT:    vmov.u8 r2, q4[1]
2293; CHECK-NEXT:    vmov.16 q3[1], r2
2294; CHECK-NEXT:    vmov.u8 r2, q4[2]
2295; CHECK-NEXT:    vmov.16 q3[2], r2
2296; CHECK-NEXT:    vmov.u8 r2, q4[3]
2297; CHECK-NEXT:    vmov.16 q3[3], r2
2298; CHECK-NEXT:    vmov.u8 r2, q4[4]
2299; CHECK-NEXT:    vmov.16 q3[4], r2
2300; CHECK-NEXT:    vmov.u8 r2, q4[5]
2301; CHECK-NEXT:    vmov.16 q3[5], r2
2302; CHECK-NEXT:    vmov.u8 r2, q4[6]
2303; CHECK-NEXT:    vmov.16 q3[6], r2
2304; CHECK-NEXT:    vmov.u8 r2, q4[7]
2305; CHECK-NEXT:    vmov.16 q3[7], r2
2306; CHECK-NEXT:    vcmp.i16 ne, q3, zr
2307; CHECK-NEXT:    vpsel q5, q2, q1
2308; CHECK-NEXT:    vmov.u16 r2, q5[2]
2309; CHECK-NEXT:    vmov.u16 r3, q5[0]
2310; CHECK-NEXT:    vmov q3[2], q3[0], r3, r2
2311; CHECK-NEXT:    vmov.u16 r2, q5[3]
2312; CHECK-NEXT:    vmov.u16 r3, q5[1]
2313; CHECK-NEXT:    vmov q3[3], q3[1], r3, r2
2314; CHECK-NEXT:    vcmp.i32 ne, q3, zr
2315; CHECK-NEXT:    vpsel q6, q2, q1
2316; CHECK-NEXT:    vmov r2, r3, d12
2317; CHECK-NEXT:    vmov q3[2], q3[0], r2, r3
2318; CHECK-NEXT:    vmov q3[3], q3[1], r2, r3
2319; CHECK-NEXT:    vmov.s8 r2, q0[1]
2320; CHECK-NEXT:    vmov.s8 r3, q0[0]
2321; CHECK-NEXT:    vcmp.i32 ne, q3, zr
2322; CHECK-NEXT:    vmov q7[2], q7[0], r3, r2
2323; CHECK-NEXT:    asrs r2, r2, #31
2324; CHECK-NEXT:    asrs r3, r3, #31
2325; CHECK-NEXT:    vmov.i32 q3, #0x0
2326; CHECK-NEXT:    vmov q7[3], q7[1], r3, r2
2327; CHECK-NEXT:    vpsel q7, q7, q3
2328; CHECK-NEXT:    vmov lr, r12, d15
2329; CHECK-NEXT:    vmov r3, r2, d14
2330; CHECK-NEXT:    adds.w lr, lr, r3
2331; CHECK-NEXT:    adc.w r12, r12, r2
2332; CHECK-NEXT:    vmov r2, r3, d13
2333; CHECK-NEXT:    vmov q6[2], q6[0], r2, r3
2334; CHECK-NEXT:    vmov q6[3], q6[1], r2, r3
2335; CHECK-NEXT:    vmov.s8 r2, q0[3]
2336; CHECK-NEXT:    vmov.s8 r3, q0[2]
2337; CHECK-NEXT:    vcmp.i32 ne, q6, zr
2338; CHECK-NEXT:    vmov q6[2], q6[0], r3, r2
2339; CHECK-NEXT:    asrs r2, r2, #31
2340; CHECK-NEXT:    asrs r3, r3, #31
2341; CHECK-NEXT:    vmov q6[3], q6[1], r3, r2
2342; CHECK-NEXT:    vpsel q6, q6, q3
2343; CHECK-NEXT:    vmov r2, r3, d12
2344; CHECK-NEXT:    adds.w lr, lr, r2
2345; CHECK-NEXT:    adc.w r12, r12, r3
2346; CHECK-NEXT:    vmov r2, r3, d13
2347; CHECK-NEXT:    adds.w lr, lr, r2
2348; CHECK-NEXT:    vmov.u16 r2, q5[6]
2349; CHECK-NEXT:    adc.w r12, r12, r3
2350; CHECK-NEXT:    vmov.u16 r3, q5[4]
2351; CHECK-NEXT:    vmov q6[2], q6[0], r3, r2
2352; CHECK-NEXT:    vmov.u16 r2, q5[7]
2353; CHECK-NEXT:    vmov.u16 r3, q5[5]
2354; CHECK-NEXT:    vmov q6[3], q6[1], r3, r2
2355; CHECK-NEXT:    vcmp.i32 ne, q6, zr
2356; CHECK-NEXT:    vpsel q5, q2, q1
2357; CHECK-NEXT:    vmov r2, r3, d10
2358; CHECK-NEXT:    vmov q6[2], q6[0], r2, r3
2359; CHECK-NEXT:    vmov q6[3], q6[1], r2, r3
2360; CHECK-NEXT:    vmov.s8 r2, q0[5]
2361; CHECK-NEXT:    vmov.s8 r3, q0[4]
2362; CHECK-NEXT:    vcmp.i32 ne, q6, zr
2363; CHECK-NEXT:    vmov q6[2], q6[0], r3, r2
2364; CHECK-NEXT:    asrs r2, r2, #31
2365; CHECK-NEXT:    asrs r3, r3, #31
2366; CHECK-NEXT:    vmov q6[3], q6[1], r3, r2
2367; CHECK-NEXT:    vpsel q6, q6, q3
2368; CHECK-NEXT:    vmov r2, r3, d12
2369; CHECK-NEXT:    adds.w lr, lr, r2
2370; CHECK-NEXT:    adc.w r12, r12, r3
2371; CHECK-NEXT:    vmov r2, r3, d13
2372; CHECK-NEXT:    adds.w lr, lr, r2
2373; CHECK-NEXT:    adc.w r12, r12, r3
2374; CHECK-NEXT:    vmov r2, r3, d11
2375; CHECK-NEXT:    vmov q5[2], q5[0], r2, r3
2376; CHECK-NEXT:    vmov q5[3], q5[1], r2, r3
2377; CHECK-NEXT:    vmov.s8 r2, q0[7]
2378; CHECK-NEXT:    vmov.s8 r3, q0[6]
2379; CHECK-NEXT:    vcmp.i32 ne, q5, zr
2380; CHECK-NEXT:    vmov q5[2], q5[0], r3, r2
2381; CHECK-NEXT:    asrs r2, r2, #31
2382; CHECK-NEXT:    asrs r3, r3, #31
2383; CHECK-NEXT:    vmov q5[3], q5[1], r3, r2
2384; CHECK-NEXT:    vpsel q5, q5, q3
2385; CHECK-NEXT:    vmov r2, r3, d10
2386; CHECK-NEXT:    adds.w lr, lr, r2
2387; CHECK-NEXT:    adc.w r12, r12, r3
2388; CHECK-NEXT:    vmov r2, r3, d11
2389; CHECK-NEXT:    adds.w lr, lr, r2
2390; CHECK-NEXT:    vmov.u8 r2, q4[8]
2391; CHECK-NEXT:    vmov.16 q5[0], r2
2392; CHECK-NEXT:    vmov.u8 r2, q4[9]
2393; CHECK-NEXT:    vmov.16 q5[1], r2
2394; CHECK-NEXT:    vmov.u8 r2, q4[10]
2395; CHECK-NEXT:    vmov.16 q5[2], r2
2396; CHECK-NEXT:    vmov.u8 r2, q4[11]
2397; CHECK-NEXT:    vmov.16 q5[3], r2
2398; CHECK-NEXT:    vmov.u8 r2, q4[12]
2399; CHECK-NEXT:    vmov.16 q5[4], r2
2400; CHECK-NEXT:    vmov.u8 r2, q4[13]
2401; CHECK-NEXT:    vmov.16 q5[5], r2
2402; CHECK-NEXT:    vmov.u8 r2, q4[14]
2403; CHECK-NEXT:    vmov.16 q5[6], r2
2404; CHECK-NEXT:    vmov.u8 r2, q4[15]
2405; CHECK-NEXT:    vmov.16 q5[7], r2
2406; CHECK-NEXT:    adc.w r12, r12, r3
2407; CHECK-NEXT:    vcmp.i16 ne, q5, zr
2408; CHECK-NEXT:    vpsel q4, q2, q1
2409; CHECK-NEXT:    vmov.u16 r2, q4[2]
2410; CHECK-NEXT:    vmov.u16 r3, q4[0]
2411; CHECK-NEXT:    vmov q5[2], q5[0], r3, r2
2412; CHECK-NEXT:    vmov.u16 r2, q4[3]
2413; CHECK-NEXT:    vmov.u16 r3, q4[1]
2414; CHECK-NEXT:    vmov q5[3], q5[1], r3, r2
2415; CHECK-NEXT:    vcmp.i32 ne, q5, zr
2416; CHECK-NEXT:    vpsel q5, q2, q1
2417; CHECK-NEXT:    vmov r2, r3, d10
2418; CHECK-NEXT:    vmov q6[2], q6[0], r2, r3
2419; CHECK-NEXT:    vmov q6[3], q6[1], r2, r3
2420; CHECK-NEXT:    vmov.s8 r2, q0[9]
2421; CHECK-NEXT:    vmov.s8 r3, q0[8]
2422; CHECK-NEXT:    vcmp.i32 ne, q6, zr
2423; CHECK-NEXT:    vmov q6[2], q6[0], r3, r2
2424; CHECK-NEXT:    asrs r2, r2, #31
2425; CHECK-NEXT:    asrs r3, r3, #31
2426; CHECK-NEXT:    vmov q6[3], q6[1], r3, r2
2427; CHECK-NEXT:    vpsel q6, q6, q3
2428; CHECK-NEXT:    vmov r2, r3, d12
2429; CHECK-NEXT:    adds.w lr, lr, r2
2430; CHECK-NEXT:    adc.w r12, r12, r3
2431; CHECK-NEXT:    vmov r2, r3, d13
2432; CHECK-NEXT:    adds.w lr, lr, r2
2433; CHECK-NEXT:    adc.w r12, r12, r3
2434; CHECK-NEXT:    vmov r2, r3, d11
2435; CHECK-NEXT:    vmov q5[2], q5[0], r2, r3
2436; CHECK-NEXT:    vmov q5[3], q5[1], r2, r3
2437; CHECK-NEXT:    vmov.s8 r2, q0[11]
2438; CHECK-NEXT:    vmov.s8 r3, q0[10]
2439; CHECK-NEXT:    vcmp.i32 ne, q5, zr
2440; CHECK-NEXT:    vmov q5[2], q5[0], r3, r2
2441; CHECK-NEXT:    asrs r2, r2, #31
2442; CHECK-NEXT:    asrs r3, r3, #31
2443; CHECK-NEXT:    vmov q5[3], q5[1], r3, r2
2444; CHECK-NEXT:    vpsel q5, q5, q3
2445; CHECK-NEXT:    vmov r2, r3, d10
2446; CHECK-NEXT:    adds.w lr, lr, r2
2447; CHECK-NEXT:    adc.w r12, r12, r3
2448; CHECK-NEXT:    vmov r2, r3, d11
2449; CHECK-NEXT:    adds.w lr, lr, r2
2450; CHECK-NEXT:    vmov.u16 r2, q4[6]
2451; CHECK-NEXT:    adc.w r12, r12, r3
2452; CHECK-NEXT:    vmov.u16 r3, q4[4]
2453; CHECK-NEXT:    vmov q5[2], q5[0], r3, r2
2454; CHECK-NEXT:    vmov.u16 r2, q4[7]
2455; CHECK-NEXT:    vmov.u16 r3, q4[5]
2456; CHECK-NEXT:    vmov q5[3], q5[1], r3, r2
2457; CHECK-NEXT:    vcmp.i32 ne, q5, zr
2458; CHECK-NEXT:    vpsel q1, q2, q1
2459; CHECK-NEXT:    vmov r2, r3, d2
2460; CHECK-NEXT:    vmov q2[2], q2[0], r2, r3
2461; CHECK-NEXT:    vmov q2[3], q2[1], r2, r3
2462; CHECK-NEXT:    vmov.s8 r2, q0[13]
2463; CHECK-NEXT:    vmov.s8 r3, q0[12]
2464; CHECK-NEXT:    vcmp.i32 ne, q2, zr
2465; CHECK-NEXT:    vmov q2[2], q2[0], r3, r2
2466; CHECK-NEXT:    asrs r2, r2, #31
2467; CHECK-NEXT:    asrs r3, r3, #31
2468; CHECK-NEXT:    vmov q2[3], q2[1], r3, r2
2469; CHECK-NEXT:    vpsel q2, q2, q3
2470; CHECK-NEXT:    vmov r2, r3, d4
2471; CHECK-NEXT:    adds.w lr, lr, r2
2472; CHECK-NEXT:    adc.w r12, r12, r3
2473; CHECK-NEXT:    vmov r2, r3, d5
2474; CHECK-NEXT:    adds.w lr, lr, r2
2475; CHECK-NEXT:    adc.w r12, r12, r3
2476; CHECK-NEXT:    vmov r2, r3, d3
2477; CHECK-NEXT:    vmov q1[2], q1[0], r2, r3
2478; CHECK-NEXT:    vmov q1[3], q1[1], r2, r3
2479; CHECK-NEXT:    vmov.s8 r2, q0[15]
2480; CHECK-NEXT:    vmov.s8 r3, q0[14]
2481; CHECK-NEXT:    vcmp.i32 ne, q1, zr
2482; CHECK-NEXT:    vmov q0[2], q0[0], r3, r2
2483; CHECK-NEXT:    asrs r2, r2, #31
2484; CHECK-NEXT:    asrs r3, r3, #31
2485; CHECK-NEXT:    vmov q0[3], q0[1], r3, r2
2486; CHECK-NEXT:    vpsel q0, q0, q3
2487; CHECK-NEXT:    vmov r2, r3, d0
2488; CHECK-NEXT:    adds.w lr, lr, r2
2489; CHECK-NEXT:    adc.w r12, r12, r3
2490; CHECK-NEXT:    vmov r2, r3, d1
2491; CHECK-NEXT:    adds.w r2, r2, lr
2492; CHECK-NEXT:    adc.w r3, r3, r12
2493; CHECK-NEXT:    adds r0, r0, r2
2494; CHECK-NEXT:    adcs r1, r3
2495; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
2496; CHECK-NEXT:    pop {r7, pc}
2497entry:
2498  %c = icmp eq <16 x i8> %b, zeroinitializer
2499  %xx = sext <16 x i8> %x to <16 x i64>
2500  %s = select <16 x i1> %c, <16 x i64> %xx, <16 x i64> zeroinitializer
2501  %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %s)
2502  %r = add i64 %z, %a
2503  ret i64 %r
2504}
2505
2506define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_acc_zext(<2 x i8> %x, <2 x i8> %b, i64 %a) {
2507; CHECK-LABEL: add_v2i8_v2i64_acc_zext:
2508; CHECK:       @ %bb.0: @ %entry
2509; CHECK-NEXT:    .save {r7, lr}
2510; CHECK-NEXT:    push {r7, lr}
2511; CHECK-NEXT:    vmov.i64 q2, #0xff
2512; CHECK-NEXT:    movs r3, #0
2513; CHECK-NEXT:    vand q1, q1, q2
2514; CHECK-NEXT:    vand q0, q0, q2
2515; CHECK-NEXT:    vmov r2, s4
2516; CHECK-NEXT:    cmp r2, #0
2517; CHECK-NEXT:    csetm r2, eq
2518; CHECK-NEXT:    bfi r3, r2, #0, #8
2519; CHECK-NEXT:    vmov r2, s6
2520; CHECK-NEXT:    vmov.i32 q1, #0x0
2521; CHECK-NEXT:    cmp r2, #0
2522; CHECK-NEXT:    csetm r2, eq
2523; CHECK-NEXT:    bfi r3, r2, #8, #8
2524; CHECK-NEXT:    vmsr p0, r3
2525; CHECK-NEXT:    vpsel q0, q0, q1
2526; CHECK-NEXT:    vmov r12, lr, d1
2527; CHECK-NEXT:    vmov r2, r3, d0
2528; CHECK-NEXT:    add r2, r12
2529; CHECK-NEXT:    orr.w r3, r3, lr
2530; CHECK-NEXT:    adds r0, r0, r2
2531; CHECK-NEXT:    adcs r1, r3
2532; CHECK-NEXT:    pop {r7, pc}
2533entry:
2534  %c = icmp eq <2 x i8> %b, zeroinitializer
2535  %xx = zext <2 x i8> %x to <2 x i64>
2536  %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
2537  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
2538  %r = add i64 %z, %a
2539  ret i64 %r
2540}
2541
2542define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_acc_sext(<2 x i8> %x, <2 x i8> %b, i64 %a) {
2543; CHECK-LABEL: add_v2i8_v2i64_acc_sext:
2544; CHECK:       @ %bb.0: @ %entry
2545; CHECK-NEXT:    .save {r7, lr}
2546; CHECK-NEXT:    push {r7, lr}
2547; CHECK-NEXT:    vmov.i32 q2, #0xff
2548; CHECK-NEXT:    movs r3, #0
2549; CHECK-NEXT:    vand q1, q1, q2
2550; CHECK-NEXT:    vmov r2, s4
2551; CHECK-NEXT:    cmp r2, #0
2552; CHECK-NEXT:    csetm r2, eq
2553; CHECK-NEXT:    bfi r3, r2, #0, #8
2554; CHECK-NEXT:    vmov r2, s6
2555; CHECK-NEXT:    vmov.i32 q1, #0x0
2556; CHECK-NEXT:    cmp r2, #0
2557; CHECK-NEXT:    csetm r2, eq
2558; CHECK-NEXT:    bfi r3, r2, #8, #8
2559; CHECK-NEXT:    vmov r2, s2
2560; CHECK-NEXT:    vmsr p0, r3
2561; CHECK-NEXT:    vmov r3, s0
2562; CHECK-NEXT:    sxtb r2, r2
2563; CHECK-NEXT:    sxtb r3, r3
2564; CHECK-NEXT:    vmov q0[2], q0[0], r3, r2
2565; CHECK-NEXT:    asrs r2, r2, #31
2566; CHECK-NEXT:    asrs r3, r3, #31
2567; CHECK-NEXT:    vmov q0[3], q0[1], r3, r2
2568; CHECK-NEXT:    vpsel q0, q0, q1
2569; CHECK-NEXT:    vmov lr, r12, d1
2570; CHECK-NEXT:    vmov r3, r2, d0
2571; CHECK-NEXT:    adds.w r3, r3, lr
2572; CHECK-NEXT:    adc.w r2, r2, r12
2573; CHECK-NEXT:    adds r0, r0, r3
2574; CHECK-NEXT:    adcs r1, r2
2575; CHECK-NEXT:    pop {r7, pc}
2576entry:
2577  %c = icmp eq <2 x i8> %b, zeroinitializer
2578  %xx = sext <2 x i8> %x to <2 x i64>
2579  %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer
2580  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
2581  %r = add i64 %z, %a
2582  ret i64 %r
2583}
2584
2585define arm_aapcs_vfpcc i64 @add_v2i64_v2i64_acc(<2 x i64> %x, <2 x i64> %b, i64 %a) {
2586; CHECK-LABEL: add_v2i64_v2i64_acc:
2587; CHECK:       @ %bb.0: @ %entry
2588; CHECK-NEXT:    .save {r7, lr}
2589; CHECK-NEXT:    push {r7, lr}
2590; CHECK-NEXT:    vmov r2, r3, d2
2591; CHECK-NEXT:    mov.w r12, #0
2592; CHECK-NEXT:    orrs r2, r3
2593; CHECK-NEXT:    csetm r2, eq
2594; CHECK-NEXT:    bfi r12, r2, #0, #8
2595; CHECK-NEXT:    vmov r2, r3, d3
2596; CHECK-NEXT:    vmov.i32 q1, #0x0
2597; CHECK-NEXT:    orrs r2, r3
2598; CHECK-NEXT:    csetm r2, eq
2599; CHECK-NEXT:    bfi r12, r2, #8, #8
2600; CHECK-NEXT:    vmsr p0, r12
2601; CHECK-NEXT:    vpsel q0, q0, q1
2602; CHECK-NEXT:    vmov lr, r12, d1
2603; CHECK-NEXT:    vmov r3, r2, d0
2604; CHECK-NEXT:    adds.w r3, r3, lr
2605; CHECK-NEXT:    adc.w r2, r2, r12
2606; CHECK-NEXT:    adds r0, r0, r3
2607; CHECK-NEXT:    adcs r1, r2
2608; CHECK-NEXT:    pop {r7, pc}
2609entry:
2610  %c = icmp eq <2 x i64> %b, zeroinitializer
2611  %s = select <2 x i1> %c, <2 x i64> %x, <2 x i64> zeroinitializer
2612  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s)
2613  %r = add i64 %z, %a
2614  ret i64 %r
2615}
2616
2617declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>)
2618declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
2619declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>)
2620declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
2621declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
2622declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>)
2623declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>)
2624declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>)
2625declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>)
2626declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>)
2627