xref: /llvm-project/llvm/test/CodeGen/Thumb2/mve-pred-selectop.ll (revision f1961153c2017351244289e1b3164bfa9125996f)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK
3
4define arm_aapcs_vfpcc <4 x i32> @add_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) {
5; CHECK-LABEL: add_v4i32:
6; CHECK:       @ %bb.0: @ %entry
7; CHECK-NEXT:    vpt.i32 eq, q0, zr
8; CHECK-NEXT:    vaddt.i32 q0, q1, q2
9; CHECK-NEXT:    bx lr
10entry:
11  %c = icmp eq <4 x i32> %z, zeroinitializer
12  %a = add <4 x i32> %x, %y
13  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
14  ret <4 x i32> %b
15}
16
17define arm_aapcs_vfpcc <8 x i16> @add_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) {
18; CHECK-LABEL: add_v8i16:
19; CHECK:       @ %bb.0: @ %entry
20; CHECK-NEXT:    vpt.i16 eq, q0, zr
21; CHECK-NEXT:    vaddt.i16 q0, q1, q2
22; CHECK-NEXT:    bx lr
23entry:
24  %c = icmp eq <8 x i16> %z, zeroinitializer
25  %a = add <8 x i16> %x, %y
26  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
27  ret <8 x i16> %b
28}
29
30define arm_aapcs_vfpcc <16 x i8> @add_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) {
31; CHECK-LABEL: add_v16i8:
32; CHECK:       @ %bb.0: @ %entry
33; CHECK-NEXT:    vpt.i8 eq, q0, zr
34; CHECK-NEXT:    vaddt.i8 q0, q1, q2
35; CHECK-NEXT:    bx lr
36entry:
37  %c = icmp eq <16 x i8> %z, zeroinitializer
38  %a = add <16 x i8> %x, %y
39  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
40  ret <16 x i8> %b
41}
42
43define arm_aapcs_vfpcc <4 x i32> @sub_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) {
44; CHECK-LABEL: sub_v4i32:
45; CHECK:       @ %bb.0: @ %entry
46; CHECK-NEXT:    vpt.i32 eq, q0, zr
47; CHECK-NEXT:    vsubt.i32 q0, q1, q2
48; CHECK-NEXT:    bx lr
49entry:
50  %c = icmp eq <4 x i32> %z, zeroinitializer
51  %a = sub <4 x i32> %x, %y
52  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
53  ret <4 x i32> %b
54}
55
56define arm_aapcs_vfpcc <8 x i16> @sub_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) {
57; CHECK-LABEL: sub_v8i16:
58; CHECK:       @ %bb.0: @ %entry
59; CHECK-NEXT:    vpt.i16 eq, q0, zr
60; CHECK-NEXT:    vsubt.i16 q0, q1, q2
61; CHECK-NEXT:    bx lr
62entry:
63  %c = icmp eq <8 x i16> %z, zeroinitializer
64  %a = sub <8 x i16> %x, %y
65  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
66  ret <8 x i16> %b
67}
68
69define arm_aapcs_vfpcc <16 x i8> @sub_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) {
70; CHECK-LABEL: sub_v16i8:
71; CHECK:       @ %bb.0: @ %entry
72; CHECK-NEXT:    vpt.i8 eq, q0, zr
73; CHECK-NEXT:    vsubt.i8 q0, q1, q2
74; CHECK-NEXT:    bx lr
75entry:
76  %c = icmp eq <16 x i8> %z, zeroinitializer
77  %a = sub <16 x i8> %x, %y
78  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
79  ret <16 x i8> %b
80}
81
82define arm_aapcs_vfpcc <4 x i32> @mul_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) {
83; CHECK-LABEL: mul_v4i32:
84; CHECK:       @ %bb.0: @ %entry
85; CHECK-NEXT:    vpt.i32 eq, q0, zr
86; CHECK-NEXT:    vmult.i32 q0, q1, q2
87; CHECK-NEXT:    bx lr
88entry:
89  %c = icmp eq <4 x i32> %z, zeroinitializer
90  %a = mul <4 x i32> %x, %y
91  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
92  ret <4 x i32> %b
93}
94
95define arm_aapcs_vfpcc <8 x i16> @mul_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) {
96; CHECK-LABEL: mul_v8i16:
97; CHECK:       @ %bb.0: @ %entry
98; CHECK-NEXT:    vpt.i16 eq, q0, zr
99; CHECK-NEXT:    vmult.i16 q0, q1, q2
100; CHECK-NEXT:    bx lr
101entry:
102  %c = icmp eq <8 x i16> %z, zeroinitializer
103  %a = mul <8 x i16> %x, %y
104  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
105  ret <8 x i16> %b
106}
107
108define arm_aapcs_vfpcc <16 x i8> @mul_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) {
109; CHECK-LABEL: mul_v16i8:
110; CHECK:       @ %bb.0: @ %entry
111; CHECK-NEXT:    vpt.i8 eq, q0, zr
112; CHECK-NEXT:    vmult.i8 q0, q1, q2
113; CHECK-NEXT:    bx lr
114entry:
115  %c = icmp eq <16 x i8> %z, zeroinitializer
116  %a = mul <16 x i8> %x, %y
117  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
118  ret <16 x i8> %b
119}
120
121define arm_aapcs_vfpcc <4 x i32> @and_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) {
122; CHECK-LABEL: and_v4i32:
123; CHECK:       @ %bb.0: @ %entry
124; CHECK-NEXT:    vpt.i32 eq, q0, zr
125; CHECK-NEXT:    vandt q0, q1, q2
126; CHECK-NEXT:    bx lr
127entry:
128  %c = icmp eq <4 x i32> %z, zeroinitializer
129  %a = and <4 x i32> %x, %y
130  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
131  ret <4 x i32> %b
132}
133
134define arm_aapcs_vfpcc <8 x i16> @and_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) {
135; CHECK-LABEL: and_v8i16:
136; CHECK:       @ %bb.0: @ %entry
137; CHECK-NEXT:    vpt.i16 eq, q0, zr
138; CHECK-NEXT:    vandt q0, q1, q2
139; CHECK-NEXT:    bx lr
140entry:
141  %c = icmp eq <8 x i16> %z, zeroinitializer
142  %a = and <8 x i16> %x, %y
143  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
144  ret <8 x i16> %b
145}
146
147define arm_aapcs_vfpcc <16 x i8> @and_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) {
148; CHECK-LABEL: and_v16i8:
149; CHECK:       @ %bb.0: @ %entry
150; CHECK-NEXT:    vpt.i8 eq, q0, zr
151; CHECK-NEXT:    vandt q0, q1, q2
152; CHECK-NEXT:    bx lr
153entry:
154  %c = icmp eq <16 x i8> %z, zeroinitializer
155  %a = and <16 x i8> %x, %y
156  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
157  ret <16 x i8> %b
158}
159
160define arm_aapcs_vfpcc <4 x i32> @or_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) {
161; CHECK-LABEL: or_v4i32:
162; CHECK:       @ %bb.0: @ %entry
163; CHECK-NEXT:    vpt.i32 eq, q0, zr
164; CHECK-NEXT:    vorrt q0, q1, q2
165; CHECK-NEXT:    bx lr
166entry:
167  %c = icmp eq <4 x i32> %z, zeroinitializer
168  %a = or <4 x i32> %x, %y
169  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
170  ret <4 x i32> %b
171}
172
173define arm_aapcs_vfpcc <8 x i16> @or_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) {
174; CHECK-LABEL: or_v8i16:
175; CHECK:       @ %bb.0: @ %entry
176; CHECK-NEXT:    vpt.i16 eq, q0, zr
177; CHECK-NEXT:    vorrt q0, q1, q2
178; CHECK-NEXT:    bx lr
179entry:
180  %c = icmp eq <8 x i16> %z, zeroinitializer
181  %a = or <8 x i16> %x, %y
182  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
183  ret <8 x i16> %b
184}
185
186define arm_aapcs_vfpcc <16 x i8> @or_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) {
187; CHECK-LABEL: or_v16i8:
188; CHECK:       @ %bb.0: @ %entry
189; CHECK-NEXT:    vpt.i8 eq, q0, zr
190; CHECK-NEXT:    vorrt q0, q1, q2
191; CHECK-NEXT:    bx lr
192entry:
193  %c = icmp eq <16 x i8> %z, zeroinitializer
194  %a = or <16 x i8> %x, %y
195  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
196  ret <16 x i8> %b
197}
198
199define arm_aapcs_vfpcc <4 x i32> @xor_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) {
200; CHECK-LABEL: xor_v4i32:
201; CHECK:       @ %bb.0: @ %entry
202; CHECK-NEXT:    vpt.i32 eq, q0, zr
203; CHECK-NEXT:    veort q0, q1, q2
204; CHECK-NEXT:    bx lr
205entry:
206  %c = icmp eq <4 x i32> %z, zeroinitializer
207  %a = xor <4 x i32> %x, %y
208  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
209  ret <4 x i32> %b
210}
211
212define arm_aapcs_vfpcc <8 x i16> @xor_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) {
213; CHECK-LABEL: xor_v8i16:
214; CHECK:       @ %bb.0: @ %entry
215; CHECK-NEXT:    vpt.i16 eq, q0, zr
216; CHECK-NEXT:    veort q0, q1, q2
217; CHECK-NEXT:    bx lr
218entry:
219  %c = icmp eq <8 x i16> %z, zeroinitializer
220  %a = xor <8 x i16> %x, %y
221  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
222  ret <8 x i16> %b
223}
224
225define arm_aapcs_vfpcc <16 x i8> @xor_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) {
226; CHECK-LABEL: xor_v16i8:
227; CHECK:       @ %bb.0: @ %entry
228; CHECK-NEXT:    vpt.i8 eq, q0, zr
229; CHECK-NEXT:    veort q0, q1, q2
230; CHECK-NEXT:    bx lr
231entry:
232  %c = icmp eq <16 x i8> %z, zeroinitializer
233  %a = xor <16 x i8> %x, %y
234  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
235  ret <16 x i8> %b
236}
237
238define arm_aapcs_vfpcc <4 x i32> @shl_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) {
239; CHECK-LABEL: shl_v4i32:
240; CHECK:       @ %bb.0: @ %entry
241; CHECK-NEXT:    vpt.i32 eq, q0, zr
242; CHECK-NEXT:    vshlt.u32 q0, q1, q2
243; CHECK-NEXT:    bx lr
244entry:
245  %c = icmp eq <4 x i32> %z, zeroinitializer
246  %a = shl <4 x i32> %x, %y
247  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
248  ret <4 x i32> %b
249}
250
251define arm_aapcs_vfpcc <8 x i16> @shl_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) {
252; CHECK-LABEL: shl_v8i16:
253; CHECK:       @ %bb.0: @ %entry
254; CHECK-NEXT:    vpt.i16 eq, q0, zr
255; CHECK-NEXT:    vshlt.u16 q0, q1, q2
256; CHECK-NEXT:    bx lr
257entry:
258  %c = icmp eq <8 x i16> %z, zeroinitializer
259  %a = shl <8 x i16> %x, %y
260  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
261  ret <8 x i16> %b
262}
263
264define arm_aapcs_vfpcc <16 x i8> @shl_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) {
265; CHECK-LABEL: shl_v16i8:
266; CHECK:       @ %bb.0: @ %entry
267; CHECK-NEXT:    vpt.i8 eq, q0, zr
268; CHECK-NEXT:    vshlt.u8 q0, q1, q2
269; CHECK-NEXT:    bx lr
270entry:
271  %c = icmp eq <16 x i8> %z, zeroinitializer
272  %a = shl <16 x i8> %x, %y
273  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
274  ret <16 x i8> %b
275}
276
277define arm_aapcs_vfpcc <4 x i32> @ashr_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) {
278; CHECK-LABEL: ashr_v4i32:
279; CHECK:       @ %bb.0: @ %entry
280; CHECK-NEXT:    vneg.s32 q2, q2
281; CHECK-NEXT:    vpt.i32 eq, q0, zr
282; CHECK-NEXT:    vshlt.s32 q0, q1, q2
283; CHECK-NEXT:    bx lr
284entry:
285  %c = icmp eq <4 x i32> %z, zeroinitializer
286  %a = ashr <4 x i32> %x, %y
287  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
288  ret <4 x i32> %b
289}
290
291define arm_aapcs_vfpcc <8 x i16> @ashr_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) {
292; CHECK-LABEL: ashr_v8i16:
293; CHECK:       @ %bb.0: @ %entry
294; CHECK-NEXT:    vneg.s16 q2, q2
295; CHECK-NEXT:    vpt.i16 eq, q0, zr
296; CHECK-NEXT:    vshlt.s16 q0, q1, q2
297; CHECK-NEXT:    bx lr
298entry:
299  %c = icmp eq <8 x i16> %z, zeroinitializer
300  %a = ashr <8 x i16> %x, %y
301  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
302  ret <8 x i16> %b
303}
304
305define arm_aapcs_vfpcc <16 x i8> @ashr_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) {
306; CHECK-LABEL: ashr_v16i8:
307; CHECK:       @ %bb.0: @ %entry
308; CHECK-NEXT:    vneg.s8 q2, q2
309; CHECK-NEXT:    vpt.i8 eq, q0, zr
310; CHECK-NEXT:    vshlt.s8 q0, q1, q2
311; CHECK-NEXT:    bx lr
312entry:
313  %c = icmp eq <16 x i8> %z, zeroinitializer
314  %a = ashr <16 x i8> %x, %y
315  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
316  ret <16 x i8> %b
317}
318
319define arm_aapcs_vfpcc <4 x i32> @lshr_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) {
320; CHECK-LABEL: lshr_v4i32:
321; CHECK:       @ %bb.0: @ %entry
322; CHECK-NEXT:    vneg.s32 q2, q2
323; CHECK-NEXT:    vpt.i32 eq, q0, zr
324; CHECK-NEXT:    vshlt.u32 q0, q1, q2
325; CHECK-NEXT:    bx lr
326entry:
327  %c = icmp eq <4 x i32> %z, zeroinitializer
328  %a = lshr <4 x i32> %x, %y
329  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
330  ret <4 x i32> %b
331}
332
333define arm_aapcs_vfpcc <8 x i16> @lshr_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) {
334; CHECK-LABEL: lshr_v8i16:
335; CHECK:       @ %bb.0: @ %entry
336; CHECK-NEXT:    vneg.s16 q2, q2
337; CHECK-NEXT:    vpt.i16 eq, q0, zr
338; CHECK-NEXT:    vshlt.u16 q0, q1, q2
339; CHECK-NEXT:    bx lr
340entry:
341  %c = icmp eq <8 x i16> %z, zeroinitializer
342  %a = lshr <8 x i16> %x, %y
343  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
344  ret <8 x i16> %b
345}
346
347define arm_aapcs_vfpcc <16 x i8> @lshr_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) {
348; CHECK-LABEL: lshr_v16i8:
349; CHECK:       @ %bb.0: @ %entry
350; CHECK-NEXT:    vneg.s8 q2, q2
351; CHECK-NEXT:    vpt.i8 eq, q0, zr
352; CHECK-NEXT:    vshlt.u8 q0, q1, q2
353; CHECK-NEXT:    bx lr
354entry:
355  %c = icmp eq <16 x i8> %z, zeroinitializer
356  %a = lshr <16 x i8> %x, %y
357  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
358  ret <16 x i8> %b
359}
360
361define arm_aapcs_vfpcc <4 x i32> @andnot_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) {
362; CHECK-LABEL: andnot_v4i32:
363; CHECK:       @ %bb.0: @ %entry
364; CHECK-NEXT:    vpt.i32 eq, q0, zr
365; CHECK-NEXT:    vbict q0, q1, q2
366; CHECK-NEXT:    bx lr
367entry:
368  %c = icmp eq <4 x i32> %z, zeroinitializer
369  %y1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
370  %a = and <4 x i32> %x, %y1
371  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
372  ret <4 x i32> %b
373}
374
375define arm_aapcs_vfpcc <8 x i16> @andnot_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) {
376; CHECK-LABEL: andnot_v8i16:
377; CHECK:       @ %bb.0: @ %entry
378; CHECK-NEXT:    vpt.i16 eq, q0, zr
379; CHECK-NEXT:    vbict q0, q1, q2
380; CHECK-NEXT:    bx lr
381entry:
382  %c = icmp eq <8 x i16> %z, zeroinitializer
383  %y1 = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
384  %a = and <8 x i16> %x, %y1
385  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
386  ret <8 x i16> %b
387}
388
389define arm_aapcs_vfpcc <16 x i8> @andnot_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) {
390; CHECK-LABEL: andnot_v16i8:
391; CHECK:       @ %bb.0: @ %entry
392; CHECK-NEXT:    vpt.i8 eq, q0, zr
393; CHECK-NEXT:    vbict q0, q1, q2
394; CHECK-NEXT:    bx lr
395entry:
396  %c = icmp eq <16 x i8> %z, zeroinitializer
397  %y1 = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
398  %a = and <16 x i8> %x, %y1
399  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
400  ret <16 x i8> %b
401}
402
403define arm_aapcs_vfpcc <4 x i32> @ornot_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) {
404; CHECK-LABEL: ornot_v4i32:
405; CHECK:       @ %bb.0: @ %entry
406; CHECK-NEXT:    vpt.i32 eq, q0, zr
407; CHECK-NEXT:    vornt q0, q1, q2
408; CHECK-NEXT:    bx lr
409entry:
410  %c = icmp eq <4 x i32> %z, zeroinitializer
411  %y1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
412  %a = or <4 x i32> %x, %y1
413  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
414  ret <4 x i32> %b
415}
416
417define arm_aapcs_vfpcc <8 x i16> @ornot_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) {
418; CHECK-LABEL: ornot_v8i16:
419; CHECK:       @ %bb.0: @ %entry
420; CHECK-NEXT:    vpt.i16 eq, q0, zr
421; CHECK-NEXT:    vornt q0, q1, q2
422; CHECK-NEXT:    bx lr
423entry:
424  %c = icmp eq <8 x i16> %z, zeroinitializer
425  %y1 = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
426  %a = or <8 x i16> %x, %y1
427  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
428  ret <8 x i16> %b
429}
430
431define arm_aapcs_vfpcc <16 x i8> @ornot_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) {
432; CHECK-LABEL: ornot_v16i8:
433; CHECK:       @ %bb.0: @ %entry
434; CHECK-NEXT:    vpt.i8 eq, q0, zr
435; CHECK-NEXT:    vornt q0, q1, q2
436; CHECK-NEXT:    bx lr
437entry:
438  %c = icmp eq <16 x i8> %z, zeroinitializer
439  %y1 = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
440  %a = or <16 x i8> %x, %y1
441  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
442  ret <16 x i8> %b
443}
444
445define arm_aapcs_vfpcc <4 x float> @fadd_v4f32(<4 x float> %z, <4 x float> %x, <4 x float> %y) {
446; CHECK-LABEL: fadd_v4f32:
447; CHECK:       @ %bb.0: @ %entry
448; CHECK-NEXT:    vpt.f32 eq, q0, zr
449; CHECK-NEXT:    vaddt.f32 q0, q1, q2
450; CHECK-NEXT:    bx lr
451entry:
452  %c = fcmp oeq <4 x float> %z, zeroinitializer
453  %a = fadd <4 x float> %x, %y
454  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %z
455  ret <4 x float> %b
456}
457
458define arm_aapcs_vfpcc <8 x half> @fadd_v8f16(<8 x half> %z, <8 x half> %x, <8 x half> %y) {
459; CHECK-LABEL: fadd_v8f16:
460; CHECK:       @ %bb.0: @ %entry
461; CHECK-NEXT:    vpt.f16 eq, q0, zr
462; CHECK-NEXT:    vaddt.f16 q0, q1, q2
463; CHECK-NEXT:    bx lr
464entry:
465  %c = fcmp oeq <8 x half> %z, zeroinitializer
466  %a = fadd <8 x half> %x, %y
467  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %z
468  ret <8 x half> %b
469}
470
471define arm_aapcs_vfpcc <4 x float> @fsub_v4f32(<4 x float> %z, <4 x float> %x, <4 x float> %y) {
472; CHECK-LABEL: fsub_v4f32:
473; CHECK:       @ %bb.0: @ %entry
474; CHECK-NEXT:    vpt.f32 eq, q0, zr
475; CHECK-NEXT:    vsubt.f32 q0, q1, q2
476; CHECK-NEXT:    bx lr
477entry:
478  %c = fcmp oeq <4 x float> %z, zeroinitializer
479  %a = fsub <4 x float> %x, %y
480  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %z
481  ret <4 x float> %b
482}
483
484define arm_aapcs_vfpcc <8 x half> @fsub_v8f16(<8 x half> %z, <8 x half> %x, <8 x half> %y) {
485; CHECK-LABEL: fsub_v8f16:
486; CHECK:       @ %bb.0: @ %entry
487; CHECK-NEXT:    vpt.f16 eq, q0, zr
488; CHECK-NEXT:    vsubt.f16 q0, q1, q2
489; CHECK-NEXT:    bx lr
490entry:
491  %c = fcmp oeq <8 x half> %z, zeroinitializer
492  %a = fsub <8 x half> %x, %y
493  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %z
494  ret <8 x half> %b
495}
496
497define arm_aapcs_vfpcc <4 x float> @fmul_v4f32(<4 x float> %z, <4 x float> %x, <4 x float> %y) {
498; CHECK-LABEL: fmul_v4f32:
499; CHECK:       @ %bb.0: @ %entry
500; CHECK-NEXT:    vpt.f32 eq, q0, zr
501; CHECK-NEXT:    vmult.f32 q0, q1, q2
502; CHECK-NEXT:    bx lr
503entry:
504  %c = fcmp oeq <4 x float> %z, zeroinitializer
505  %a = fmul <4 x float> %x, %y
506  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %z
507  ret <4 x float> %b
508}
509
510define arm_aapcs_vfpcc <8 x half> @fmul_v8f16(<8 x half> %z, <8 x half> %x, <8 x half> %y) {
511; CHECK-LABEL: fmul_v8f16:
512; CHECK:       @ %bb.0: @ %entry
513; CHECK-NEXT:    vpt.f16 eq, q0, zr
514; CHECK-NEXT:    vmult.f16 q0, q1, q2
515; CHECK-NEXT:    bx lr
516entry:
517  %c = fcmp oeq <8 x half> %z, zeroinitializer
518  %a = fmul <8 x half> %x, %y
519  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %z
520  ret <8 x half> %b
521}
522
523define arm_aapcs_vfpcc <4 x float> @fdiv_v4f32(<4 x float> %z, <4 x float> %x, <4 x float> %y) {
524; CHECK-LABEL: fdiv_v4f32:
525; CHECK:       @ %bb.0: @ %entry
526; CHECK-NEXT:    vdiv.f32 s7, s7, s11
527; CHECK-NEXT:    vcmp.f32 eq, q0, zr
528; CHECK-NEXT:    vdiv.f32 s6, s6, s10
529; CHECK-NEXT:    vdiv.f32 s5, s5, s9
530; CHECK-NEXT:    vdiv.f32 s4, s4, s8
531; CHECK-NEXT:    vpsel q0, q1, q0
532; CHECK-NEXT:    bx lr
533entry:
534  %c = fcmp oeq <4 x float> %z, zeroinitializer
535  %a = fdiv <4 x float> %x, %y
536  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %z
537  ret <4 x float> %b
538}
539
540define arm_aapcs_vfpcc <8 x half> @fdiv_v8f16(<8 x half> %z, <8 x half> %x, <8 x half> %y) {
541; CHECK-LABEL: fdiv_v8f16:
542; CHECK:       @ %bb.0: @ %entry
543; CHECK-NEXT:    vmovx.f16 s14, s4
544; CHECK-NEXT:    vmovx.f16 s12, s8
545; CHECK-NEXT:    vdiv.f16 s4, s4, s8
546; CHECK-NEXT:    vdiv.f16 s12, s14, s12
547; CHECK-NEXT:    vins.f16 s4, s12
548; CHECK-NEXT:    vmovx.f16 s12, s5
549; CHECK-NEXT:    vmovx.f16 s8, s9
550; CHECK-NEXT:    vdiv.f16 s5, s5, s9
551; CHECK-NEXT:    vdiv.f16 s8, s12, s8
552; CHECK-NEXT:    vmovx.f16 s12, s6
553; CHECK-NEXT:    vins.f16 s5, s8
554; CHECK-NEXT:    vmovx.f16 s8, s10
555; CHECK-NEXT:    vdiv.f16 s6, s6, s10
556; CHECK-NEXT:    vdiv.f16 s8, s12, s8
557; CHECK-NEXT:    vins.f16 s6, s8
558; CHECK-NEXT:    vmovx.f16 s10, s7
559; CHECK-NEXT:    vmovx.f16 s8, s11
560; CHECK-NEXT:    vdiv.f16 s7, s7, s11
561; CHECK-NEXT:    vdiv.f16 s8, s10, s8
562; CHECK-NEXT:    vcmp.f16 eq, q0, zr
563; CHECK-NEXT:    vins.f16 s7, s8
564; CHECK-NEXT:    vpsel q0, q1, q0
565; CHECK-NEXT:    bx lr
566entry:
567  %c = fcmp oeq <8 x half> %z, zeroinitializer
568  %a = fdiv <8 x half> %x, %y
569  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %z
570  ret <8 x half> %b
571}
572
573define arm_aapcs_vfpcc <4 x i32> @icmp_slt_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) {
574; CHECK-LABEL: icmp_slt_v4i32:
575; CHECK:       @ %bb.0: @ %entry
576; CHECK-NEXT:    vpt.i32 eq, q0, zr
577; CHECK-NEXT:    vmint.s32 q0, q1, q2
578; CHECK-NEXT:    bx lr
579entry:
580  %c = icmp eq <4 x i32> %z, zeroinitializer
581  %a1 = icmp slt <4 x i32> %x, %y
582  %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y
583  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
584  ret <4 x i32> %b
585}
586
587define arm_aapcs_vfpcc <8 x i16> @icmp_slt_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) {
588; CHECK-LABEL: icmp_slt_v8i16:
589; CHECK:       @ %bb.0: @ %entry
590; CHECK-NEXT:    vpt.i16 eq, q0, zr
591; CHECK-NEXT:    vmint.s16 q0, q1, q2
592; CHECK-NEXT:    bx lr
593entry:
594  %c = icmp eq <8 x i16> %z, zeroinitializer
595  %a1 = icmp slt <8 x i16> %x, %y
596  %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y
597  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
598  ret <8 x i16> %b
599}
600
601define arm_aapcs_vfpcc <16 x i8> @icmp_slt_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) {
602; CHECK-LABEL: icmp_slt_v16i8:
603; CHECK:       @ %bb.0: @ %entry
604; CHECK-NEXT:    vpt.i8 eq, q0, zr
605; CHECK-NEXT:    vmint.s8 q0, q1, q2
606; CHECK-NEXT:    bx lr
607entry:
608  %c = icmp eq <16 x i8> %z, zeroinitializer
609  %a1 = icmp slt <16 x i8> %x, %y
610  %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y
611  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
612  ret <16 x i8> %b
613}
614
615define arm_aapcs_vfpcc <4 x i32> @icmp_sgt_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) {
616; CHECK-LABEL: icmp_sgt_v4i32:
617; CHECK:       @ %bb.0: @ %entry
618; CHECK-NEXT:    vpt.i32 eq, q0, zr
619; CHECK-NEXT:    vmaxt.s32 q0, q1, q2
620; CHECK-NEXT:    bx lr
621entry:
622  %c = icmp eq <4 x i32> %z, zeroinitializer
623  %a1 = icmp sgt <4 x i32> %x, %y
624  %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y
625  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
626  ret <4 x i32> %b
627}
628
629define arm_aapcs_vfpcc <8 x i16> @icmp_sgt_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) {
630; CHECK-LABEL: icmp_sgt_v8i16:
631; CHECK:       @ %bb.0: @ %entry
632; CHECK-NEXT:    vpt.i16 eq, q0, zr
633; CHECK-NEXT:    vmaxt.s16 q0, q1, q2
634; CHECK-NEXT:    bx lr
635entry:
636  %c = icmp eq <8 x i16> %z, zeroinitializer
637  %a1 = icmp sgt <8 x i16> %x, %y
638  %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y
639  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
640  ret <8 x i16> %b
641}
642
643define arm_aapcs_vfpcc <16 x i8> @icmp_sgt_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) {
644; CHECK-LABEL: icmp_sgt_v16i8:
645; CHECK:       @ %bb.0: @ %entry
646; CHECK-NEXT:    vpt.i8 eq, q0, zr
647; CHECK-NEXT:    vmaxt.s8 q0, q1, q2
648; CHECK-NEXT:    bx lr
649entry:
650  %c = icmp eq <16 x i8> %z, zeroinitializer
651  %a1 = icmp sgt <16 x i8> %x, %y
652  %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y
653  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
654  ret <16 x i8> %b
655}
656
657define arm_aapcs_vfpcc <4 x i32> @icmp_ult_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) {
658; CHECK-LABEL: icmp_ult_v4i32:
659; CHECK:       @ %bb.0: @ %entry
660; CHECK-NEXT:    vpt.i32 eq, q0, zr
661; CHECK-NEXT:    vmint.u32 q0, q1, q2
662; CHECK-NEXT:    bx lr
663entry:
664  %c = icmp eq <4 x i32> %z, zeroinitializer
665  %a1 = icmp ult <4 x i32> %x, %y
666  %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y
667  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
668  ret <4 x i32> %b
669}
670
671define arm_aapcs_vfpcc <8 x i16> @icmp_ult_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) {
672; CHECK-LABEL: icmp_ult_v8i16:
673; CHECK:       @ %bb.0: @ %entry
674; CHECK-NEXT:    vpt.i16 eq, q0, zr
675; CHECK-NEXT:    vmint.u16 q0, q1, q2
676; CHECK-NEXT:    bx lr
677entry:
678  %c = icmp eq <8 x i16> %z, zeroinitializer
679  %a1 = icmp ult <8 x i16> %x, %y
680  %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y
681  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
682  ret <8 x i16> %b
683}
684
685define arm_aapcs_vfpcc <16 x i8> @icmp_ult_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) {
686; CHECK-LABEL: icmp_ult_v16i8:
687; CHECK:       @ %bb.0: @ %entry
688; CHECK-NEXT:    vpt.i8 eq, q0, zr
689; CHECK-NEXT:    vmint.u8 q0, q1, q2
690; CHECK-NEXT:    bx lr
691entry:
692  %c = icmp eq <16 x i8> %z, zeroinitializer
693  %a1 = icmp ult <16 x i8> %x, %y
694  %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y
695  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
696  ret <16 x i8> %b
697}
698
699define arm_aapcs_vfpcc <4 x i32> @icmp_ugt_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) {
700; CHECK-LABEL: icmp_ugt_v4i32:
701; CHECK:       @ %bb.0: @ %entry
702; CHECK-NEXT:    vpt.i32 eq, q0, zr
703; CHECK-NEXT:    vmaxt.u32 q0, q1, q2
704; CHECK-NEXT:    bx lr
705entry:
706  %c = icmp eq <4 x i32> %z, zeroinitializer
707  %a1 = icmp ugt <4 x i32> %x, %y
708  %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y
709  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
710  ret <4 x i32> %b
711}
712
713define arm_aapcs_vfpcc <8 x i16> @icmp_ugt_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) {
714; CHECK-LABEL: icmp_ugt_v8i16:
715; CHECK:       @ %bb.0: @ %entry
716; CHECK-NEXT:    vpt.i16 eq, q0, zr
717; CHECK-NEXT:    vmaxt.u16 q0, q1, q2
718; CHECK-NEXT:    bx lr
719entry:
720  %c = icmp eq <8 x i16> %z, zeroinitializer
721  %a1 = icmp ugt <8 x i16> %x, %y
722  %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y
723  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
724  ret <8 x i16> %b
725}
726
727define arm_aapcs_vfpcc <16 x i8> @icmp_ugt_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) {
728; CHECK-LABEL: icmp_ugt_v16i8:
729; CHECK:       @ %bb.0: @ %entry
730; CHECK-NEXT:    vpt.i8 eq, q0, zr
731; CHECK-NEXT:    vmaxt.u8 q0, q1, q2
732; CHECK-NEXT:    bx lr
733entry:
734  %c = icmp eq <16 x i8> %z, zeroinitializer
735  %a1 = icmp ugt <16 x i8> %x, %y
736  %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y
737  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
738  ret <16 x i8> %b
739}
740
741define arm_aapcs_vfpcc <4 x float> @fcmp_fast_olt_v4f32(<4 x float> %z, <4 x float> %x, <4 x float> %y) {
742; CHECK-LABEL: fcmp_fast_olt_v4f32:
743; CHECK:       @ %bb.0: @ %entry
744; CHECK-NEXT:    vpt.f32 eq, q0, zr
745; CHECK-NEXT:    vminnmt.f32 q0, q1, q2
746; CHECK-NEXT:    bx lr
747entry:
748  %c = fcmp oeq <4 x float> %z, zeroinitializer
749  %a1 = fcmp fast olt <4 x float> %x, %y
750  %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y
751  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %z
752  ret <4 x float> %b
753}
754
755define arm_aapcs_vfpcc <8 x half> @fcmp_fast_olt_v8f16(<8 x half> %z, <8 x half> %x, <8 x half> %y) {
756; CHECK-LABEL: fcmp_fast_olt_v8f16:
757; CHECK:       @ %bb.0: @ %entry
758; CHECK-NEXT:    vpt.f16 eq, q0, zr
759; CHECK-NEXT:    vminnmt.f16 q0, q1, q2
760; CHECK-NEXT:    bx lr
761entry:
762  %c = fcmp oeq <8 x half> %z, zeroinitializer
763  %a1 = fcmp fast olt <8 x half> %x, %y
764  %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y
765  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %z
766  ret <8 x half> %b
767}
768
769define arm_aapcs_vfpcc <4 x float> @fcmp_fast_ogt_v4f32(<4 x float> %z, <4 x float> %x, <4 x float> %y) {
770; CHECK-LABEL: fcmp_fast_ogt_v4f32:
771; CHECK:       @ %bb.0: @ %entry
772; CHECK-NEXT:    vpt.f32 eq, q0, zr
773; CHECK-NEXT:    vmaxnmt.f32 q0, q1, q2
774; CHECK-NEXT:    bx lr
775entry:
776  %c = fcmp oeq <4 x float> %z, zeroinitializer
777  %a1 = fcmp fast ogt <4 x float> %x, %y
778  %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y
779  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %z
780  ret <4 x float> %b
781}
782
783define arm_aapcs_vfpcc <8 x half> @fcmp_fast_ogt_v8f16(<8 x half> %z, <8 x half> %x, <8 x half> %y) {
784; CHECK-LABEL: fcmp_fast_ogt_v8f16:
785; CHECK:       @ %bb.0: @ %entry
786; CHECK-NEXT:    vpt.f16 eq, q0, zr
787; CHECK-NEXT:    vmaxnmt.f16 q0, q1, q2
788; CHECK-NEXT:    bx lr
789entry:
790  %c = fcmp oeq <8 x half> %z, zeroinitializer
791  %a1 = fcmp fast ogt <8 x half> %x, %y
792  %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y
793  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %z
794  ret <8 x half> %b
795}
796
797define arm_aapcs_vfpcc <4 x i32> @sadd_sat_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) {
798; CHECK-LABEL: sadd_sat_v4i32:
799; CHECK:       @ %bb.0: @ %entry
800; CHECK-NEXT:    vpt.i32 eq, q0, zr
801; CHECK-NEXT:    vqaddt.s32 q0, q1, q2
802; CHECK-NEXT:    bx lr
803entry:
804  %c = icmp eq <4 x i32> %z, zeroinitializer
805  %a = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
806  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
807  ret <4 x i32> %b
808}
809
810define arm_aapcs_vfpcc <8 x i16> @sadd_sat_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) {
811; CHECK-LABEL: sadd_sat_v8i16:
812; CHECK:       @ %bb.0: @ %entry
813; CHECK-NEXT:    vpt.i16 eq, q0, zr
814; CHECK-NEXT:    vqaddt.s16 q0, q1, q2
815; CHECK-NEXT:    bx lr
816entry:
817  %c = icmp eq <8 x i16> %z, zeroinitializer
818  %a = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
819  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
820  ret <8 x i16> %b
821}
822
823define arm_aapcs_vfpcc <16 x i8> @sadd_sat_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) {
824; CHECK-LABEL: sadd_sat_v16i8:
825; CHECK:       @ %bb.0: @ %entry
826; CHECK-NEXT:    vpt.i8 eq, q0, zr
827; CHECK-NEXT:    vqaddt.s8 q0, q1, q2
828; CHECK-NEXT:    bx lr
829entry:
830  %c = icmp eq <16 x i8> %z, zeroinitializer
831  %a = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
832  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
833  ret <16 x i8> %b
834}
835
836define arm_aapcs_vfpcc <4 x i32> @uadd_sat_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) {
837; CHECK-LABEL: uadd_sat_v4i32:
838; CHECK:       @ %bb.0: @ %entry
839; CHECK-NEXT:    vpt.i32 eq, q0, zr
840; CHECK-NEXT:    vqaddt.u32 q0, q1, q2
841; CHECK-NEXT:    bx lr
842entry:
843  %c = icmp eq <4 x i32> %z, zeroinitializer
844  %a = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
845  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
846  ret <4 x i32> %b
847}
848
849define arm_aapcs_vfpcc <8 x i16> @uadd_sat_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) {
850; CHECK-LABEL: uadd_sat_v8i16:
851; CHECK:       @ %bb.0: @ %entry
852; CHECK-NEXT:    vpt.i16 eq, q0, zr
853; CHECK-NEXT:    vqaddt.u16 q0, q1, q2
854; CHECK-NEXT:    bx lr
855entry:
856  %c = icmp eq <8 x i16> %z, zeroinitializer
857  %a = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
858  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
859  ret <8 x i16> %b
860}
861
862define arm_aapcs_vfpcc <16 x i8> @uadd_sat_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) {
863; CHECK-LABEL: uadd_sat_v16i8:
864; CHECK:       @ %bb.0: @ %entry
865; CHECK-NEXT:    vpt.i8 eq, q0, zr
866; CHECK-NEXT:    vqaddt.u8 q0, q1, q2
867; CHECK-NEXT:    bx lr
868entry:
869  %c = icmp eq <16 x i8> %z, zeroinitializer
870  %a = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
871  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
872  ret <16 x i8> %b
873}
874
875define arm_aapcs_vfpcc <4 x i32> @ssub_sat_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) {
876; CHECK-LABEL: ssub_sat_v4i32:
877; CHECK:       @ %bb.0: @ %entry
878; CHECK-NEXT:    vpt.i32 eq, q0, zr
879; CHECK-NEXT:    vqsubt.s32 q0, q1, q2
880; CHECK-NEXT:    bx lr
881entry:
882  %c = icmp eq <4 x i32> %z, zeroinitializer
883  %a = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
884  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
885  ret <4 x i32> %b
886}
887
888define arm_aapcs_vfpcc <8 x i16> @ssub_sat_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) {
889; CHECK-LABEL: ssub_sat_v8i16:
890; CHECK:       @ %bb.0: @ %entry
891; CHECK-NEXT:    vpt.i16 eq, q0, zr
892; CHECK-NEXT:    vqsubt.s16 q0, q1, q2
893; CHECK-NEXT:    bx lr
894entry:
895  %c = icmp eq <8 x i16> %z, zeroinitializer
896  %a = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
897  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
898  ret <8 x i16> %b
899}
900
901define arm_aapcs_vfpcc <16 x i8> @ssub_sat_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) {
902; CHECK-LABEL: ssub_sat_v16i8:
903; CHECK:       @ %bb.0: @ %entry
904; CHECK-NEXT:    vpt.i8 eq, q0, zr
905; CHECK-NEXT:    vqsubt.s8 q0, q1, q2
906; CHECK-NEXT:    bx lr
907entry:
908  %c = icmp eq <16 x i8> %z, zeroinitializer
909  %a = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
910  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
911  ret <16 x i8> %b
912}
913
914define arm_aapcs_vfpcc <4 x i32> @usub_sat_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) {
915; CHECK-LABEL: usub_sat_v4i32:
916; CHECK:       @ %bb.0: @ %entry
917; CHECK-NEXT:    vpt.i32 eq, q0, zr
918; CHECK-NEXT:    vqsubt.u32 q0, q1, q2
919; CHECK-NEXT:    bx lr
920entry:
921  %c = icmp eq <4 x i32> %z, zeroinitializer
922  %a = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
923  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
924  ret <4 x i32> %b
925}
926
927define arm_aapcs_vfpcc <8 x i16> @usub_sat_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) {
928; CHECK-LABEL: usub_sat_v8i16:
929; CHECK:       @ %bb.0: @ %entry
930; CHECK-NEXT:    vpt.i16 eq, q0, zr
931; CHECK-NEXT:    vqsubt.u16 q0, q1, q2
932; CHECK-NEXT:    bx lr
933entry:
934  %c = icmp eq <8 x i16> %z, zeroinitializer
935  %a = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
936  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
937  ret <8 x i16> %b
938}
939
940define arm_aapcs_vfpcc <16 x i8> @usub_sat_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) {
941; CHECK-LABEL: usub_sat_v16i8:
942; CHECK:       @ %bb.0: @ %entry
943; CHECK-NEXT:    vpt.i8 eq, q0, zr
944; CHECK-NEXT:    vqsubt.u8 q0, q1, q2
945; CHECK-NEXT:    bx lr
946entry:
947  %c = icmp eq <16 x i8> %z, zeroinitializer
948  %a = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
949  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
950  ret <16 x i8> %b
951}
952
953define arm_aapcs_vfpcc <4 x i32> @addqr_v4i32(<4 x i32> %z, <4 x i32> %x, i32 %y) {
954; CHECK-LABEL: addqr_v4i32:
955; CHECK:       @ %bb.0: @ %entry
956; CHECK-NEXT:    vpt.i32 eq, q0, zr
957; CHECK-NEXT:    vaddt.i32 q0, q1, r0
958; CHECK-NEXT:    bx lr
959entry:
960  %c = icmp eq <4 x i32> %z, zeroinitializer
961  %i = insertelement <4 x i32> undef, i32 %y, i32 0
962  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
963  %a = add <4 x i32> %x, %ys
964  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
965  ret <4 x i32> %b
966}
967
968define arm_aapcs_vfpcc <8 x i16> @addqr_v8i16(<8 x i16> %z, <8 x i16> %x, i16 %y) {
969; CHECK-LABEL: addqr_v8i16:
970; CHECK:       @ %bb.0: @ %entry
971; CHECK-NEXT:    vpt.i16 eq, q0, zr
972; CHECK-NEXT:    vaddt.i16 q0, q1, r0
973; CHECK-NEXT:    bx lr
974entry:
975  %c = icmp eq <8 x i16> %z, zeroinitializer
976  %i = insertelement <8 x i16> undef, i16 %y, i32 0
977  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
978  %a = add <8 x i16> %x, %ys
979  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
980  ret <8 x i16> %b
981}
982
983define arm_aapcs_vfpcc <16 x i8> @addqr_v16i8(<16 x i8> %z, <16 x i8> %x, i8 %y) {
984; CHECK-LABEL: addqr_v16i8:
985; CHECK:       @ %bb.0: @ %entry
986; CHECK-NEXT:    vpt.i8 eq, q0, zr
987; CHECK-NEXT:    vaddt.i8 q0, q1, r0
988; CHECK-NEXT:    bx lr
989entry:
990  %c = icmp eq <16 x i8> %z, zeroinitializer
991  %i = insertelement <16 x i8> undef, i8 %y, i32 0
992  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
993  %a = add <16 x i8> %x, %ys
994  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
995  ret <16 x i8> %b
996}
997
998define arm_aapcs_vfpcc <4 x i32> @subqr_v4i32(<4 x i32> %z, <4 x i32> %x, i32 %y) {
999; CHECK-LABEL: subqr_v4i32:
1000; CHECK:       @ %bb.0: @ %entry
1001; CHECK-NEXT:    vpt.i32 eq, q0, zr
1002; CHECK-NEXT:    vsubt.i32 q0, q1, r0
1003; CHECK-NEXT:    bx lr
1004entry:
1005  %c = icmp eq <4 x i32> %z, zeroinitializer
1006  %i = insertelement <4 x i32> undef, i32 %y, i32 0
1007  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
1008  %a = sub <4 x i32> %x, %ys
1009  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
1010  ret <4 x i32> %b
1011}
1012
1013define arm_aapcs_vfpcc <8 x i16> @subqr_v8i16(<8 x i16> %z, <8 x i16> %x, i16 %y) {
1014; CHECK-LABEL: subqr_v8i16:
1015; CHECK:       @ %bb.0: @ %entry
1016; CHECK-NEXT:    vpt.i16 eq, q0, zr
1017; CHECK-NEXT:    vsubt.i16 q0, q1, r0
1018; CHECK-NEXT:    bx lr
1019entry:
1020  %c = icmp eq <8 x i16> %z, zeroinitializer
1021  %i = insertelement <8 x i16> undef, i16 %y, i32 0
1022  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
1023  %a = sub <8 x i16> %x, %ys
1024  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
1025  ret <8 x i16> %b
1026}
1027
1028define arm_aapcs_vfpcc <16 x i8> @subqr_v16i8(<16 x i8> %z, <16 x i8> %x, i8 %y) {
1029; CHECK-LABEL: subqr_v16i8:
1030; CHECK:       @ %bb.0: @ %entry
1031; CHECK-NEXT:    vpt.i8 eq, q0, zr
1032; CHECK-NEXT:    vsubt.i8 q0, q1, r0
1033; CHECK-NEXT:    bx lr
1034entry:
1035  %c = icmp eq <16 x i8> %z, zeroinitializer
1036  %i = insertelement <16 x i8> undef, i8 %y, i32 0
1037  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
1038  %a = sub <16 x i8> %x, %ys
1039  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
1040  ret <16 x i8> %b
1041}
1042
1043define arm_aapcs_vfpcc <4 x i32> @mulqr_v4i32(<4 x i32> %z, <4 x i32> %x, i32 %y) {
1044; CHECK-LABEL: mulqr_v4i32:
1045; CHECK:       @ %bb.0: @ %entry
1046; CHECK-NEXT:    vpt.i32 eq, q0, zr
1047; CHECK-NEXT:    vmult.i32 q0, q1, r0
1048; CHECK-NEXT:    bx lr
1049entry:
1050  %c = icmp eq <4 x i32> %z, zeroinitializer
1051  %i = insertelement <4 x i32> undef, i32 %y, i32 0
1052  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
1053  %a = mul <4 x i32> %x, %ys
1054  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
1055  ret <4 x i32> %b
1056}
1057
1058define arm_aapcs_vfpcc <8 x i16> @mulqr_v8i16(<8 x i16> %z, <8 x i16> %x, i16 %y) {
1059; CHECK-LABEL: mulqr_v8i16:
1060; CHECK:       @ %bb.0: @ %entry
1061; CHECK-NEXT:    vpt.i16 eq, q0, zr
1062; CHECK-NEXT:    vmult.i16 q0, q1, r0
1063; CHECK-NEXT:    bx lr
1064entry:
1065  %c = icmp eq <8 x i16> %z, zeroinitializer
1066  %i = insertelement <8 x i16> undef, i16 %y, i32 0
1067  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
1068  %a = mul <8 x i16> %x, %ys
1069  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
1070  ret <8 x i16> %b
1071}
1072
1073define arm_aapcs_vfpcc <16 x i8> @mulqr_v16i8(<16 x i8> %z, <16 x i8> %x, i8 %y) {
1074; CHECK-LABEL: mulqr_v16i8:
1075; CHECK:       @ %bb.0: @ %entry
1076; CHECK-NEXT:    vpt.i8 eq, q0, zr
1077; CHECK-NEXT:    vmult.i8 q0, q1, r0
1078; CHECK-NEXT:    bx lr
1079entry:
1080  %c = icmp eq <16 x i8> %z, zeroinitializer
1081  %i = insertelement <16 x i8> undef, i8 %y, i32 0
1082  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
1083  %a = mul <16 x i8> %x, %ys
1084  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
1085  ret <16 x i8> %b
1086}
1087
1088define arm_aapcs_vfpcc <4 x float> @faddqr_v4f32(<4 x float> %z, <4 x float> %x, float %y) {
1089; CHECK-LABEL: faddqr_v4f32:
1090; CHECK:       @ %bb.0: @ %entry
1091; CHECK-NEXT:    vmov r0, s8
1092; CHECK-NEXT:    vpt.f32 eq, q0, zr
1093; CHECK-NEXT:    vaddt.f32 q0, q1, r0
1094; CHECK-NEXT:    bx lr
1095entry:
1096  %c = fcmp oeq <4 x float> %z, zeroinitializer
1097  %i = insertelement <4 x float> undef, float %y, i32 0
1098  %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
1099  %a = fadd <4 x float> %x, %ys
1100  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %z
1101  ret <4 x float> %b
1102}
1103
1104define arm_aapcs_vfpcc <8 x half> @faddqr_v8f16(<8 x half> %z, <8 x half> %x, half %y) {
1105; CHECK-LABEL: faddqr_v8f16:
1106; CHECK:       @ %bb.0: @ %entry
1107; CHECK-NEXT:    vmov.f16 r0, s8
1108; CHECK-NEXT:    vpt.f16 eq, q0, zr
1109; CHECK-NEXT:    vaddt.f16 q0, q1, r0
1110; CHECK-NEXT:    bx lr
1111entry:
1112  %c = fcmp oeq <8 x half> %z, zeroinitializer
1113  %i = insertelement <8 x half> undef, half %y, i32 0
1114  %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
1115  %a = fadd <8 x half> %x, %ys
1116  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %z
1117  ret <8 x half> %b
1118}
1119
1120define arm_aapcs_vfpcc <4 x float> @fsubqr_v4f32(<4 x float> %z, <4 x float> %x, float %y) {
1121; CHECK-LABEL: fsubqr_v4f32:
1122; CHECK:       @ %bb.0: @ %entry
1123; CHECK-NEXT:    vmov r0, s8
1124; CHECK-NEXT:    vpt.f32 eq, q0, zr
1125; CHECK-NEXT:    vsubt.f32 q0, q1, r0
1126; CHECK-NEXT:    bx lr
1127entry:
1128  %c = fcmp oeq <4 x float> %z, zeroinitializer
1129  %i = insertelement <4 x float> undef, float %y, i32 0
1130  %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
1131  %a = fsub <4 x float> %x, %ys
1132  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %z
1133  ret <4 x float> %b
1134}
1135
1136define arm_aapcs_vfpcc <8 x half> @fsubqr_v8f16(<8 x half> %z, <8 x half> %x, half %y) {
1137; CHECK-LABEL: fsubqr_v8f16:
1138; CHECK:       @ %bb.0: @ %entry
1139; CHECK-NEXT:    vmov.f16 r0, s8
1140; CHECK-NEXT:    vpt.f16 eq, q0, zr
1141; CHECK-NEXT:    vsubt.f16 q0, q1, r0
1142; CHECK-NEXT:    bx lr
1143entry:
1144  %c = fcmp oeq <8 x half> %z, zeroinitializer
1145  %i = insertelement <8 x half> undef, half %y, i32 0
1146  %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
1147  %a = fsub <8 x half> %x, %ys
1148  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %z
1149  ret <8 x half> %b
1150}
1151
1152define arm_aapcs_vfpcc <4 x float> @fmulqr_v4f32(<4 x float> %z, <4 x float> %x, float %y) {
1153; CHECK-LABEL: fmulqr_v4f32:
1154; CHECK:       @ %bb.0: @ %entry
1155; CHECK-NEXT:    vmov r0, s8
1156; CHECK-NEXT:    vpt.f32 eq, q0, zr
1157; CHECK-NEXT:    vmult.f32 q0, q1, r0
1158; CHECK-NEXT:    bx lr
1159entry:
1160  %c = fcmp oeq <4 x float> %z, zeroinitializer
1161  %i = insertelement <4 x float> undef, float %y, i32 0
1162  %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
1163  %a = fmul <4 x float> %x, %ys
1164  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %z
1165  ret <4 x float> %b
1166}
1167
1168define arm_aapcs_vfpcc <8 x half> @fmulqr_v8f16(<8 x half> %z, <8 x half> %x, half %y) {
1169; CHECK-LABEL: fmulqr_v8f16:
1170; CHECK:       @ %bb.0: @ %entry
1171; CHECK-NEXT:    vmov.f16 r0, s8
1172; CHECK-NEXT:    vpt.f16 eq, q0, zr
1173; CHECK-NEXT:    vmult.f16 q0, q1, r0
1174; CHECK-NEXT:    bx lr
1175entry:
1176  %c = fcmp oeq <8 x half> %z, zeroinitializer
1177  %i = insertelement <8 x half> undef, half %y, i32 0
1178  %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
1179  %a = fmul <8 x half> %x, %ys
1180  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %z
1181  ret <8 x half> %b
1182}
1183
1184define arm_aapcs_vfpcc <4 x i32> @sadd_satqr_v4i32(<4 x i32> %z, <4 x i32> %x, i32 %y) {
1185; CHECK-LABEL: sadd_satqr_v4i32:
1186; CHECK:       @ %bb.0: @ %entry
1187; CHECK-NEXT:    vpt.i32 eq, q0, zr
1188; CHECK-NEXT:    vqaddt.s32 q0, q1, r0
1189; CHECK-NEXT:    bx lr
1190entry:
1191  %c = icmp eq <4 x i32> %z, zeroinitializer
1192  %i = insertelement <4 x i32> undef, i32 %y, i32 0
1193  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
1194  %a = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
1195  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
1196  ret <4 x i32> %b
1197}
1198
1199define arm_aapcs_vfpcc <8 x i16> @sadd_satqr_v8i16(<8 x i16> %z, <8 x i16> %x, i16 %y) {
1200; CHECK-LABEL: sadd_satqr_v8i16:
1201; CHECK:       @ %bb.0: @ %entry
1202; CHECK-NEXT:    vpt.i16 eq, q0, zr
1203; CHECK-NEXT:    vqaddt.s16 q0, q1, r0
1204; CHECK-NEXT:    bx lr
1205entry:
1206  %c = icmp eq <8 x i16> %z, zeroinitializer
1207  %i = insertelement <8 x i16> undef, i16 %y, i32 0
1208  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
1209  %a = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
1210  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
1211  ret <8 x i16> %b
1212}
1213
1214define arm_aapcs_vfpcc <16 x i8> @sadd_satqr_v16i8(<16 x i8> %z, <16 x i8> %x, i8 %y) {
1215; CHECK-LABEL: sadd_satqr_v16i8:
1216; CHECK:       @ %bb.0: @ %entry
1217; CHECK-NEXT:    vpt.i8 eq, q0, zr
1218; CHECK-NEXT:    vqaddt.s8 q0, q1, r0
1219; CHECK-NEXT:    bx lr
1220entry:
1221  %c = icmp eq <16 x i8> %z, zeroinitializer
1222  %i = insertelement <16 x i8> undef, i8 %y, i32 0
1223  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
1224  %a = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
1225  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
1226  ret <16 x i8> %b
1227}
1228
1229define arm_aapcs_vfpcc <4 x i32> @uadd_satqr_v4i32(<4 x i32> %z, <4 x i32> %x, i32 %y) {
1230; CHECK-LABEL: uadd_satqr_v4i32:
1231; CHECK:       @ %bb.0: @ %entry
1232; CHECK-NEXT:    vpt.i32 eq, q0, zr
1233; CHECK-NEXT:    vqaddt.u32 q0, q1, r0
1234; CHECK-NEXT:    bx lr
1235entry:
1236  %c = icmp eq <4 x i32> %z, zeroinitializer
1237  %i = insertelement <4 x i32> undef, i32 %y, i32 0
1238  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
1239  %a = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
1240  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
1241  ret <4 x i32> %b
1242}
1243
1244define arm_aapcs_vfpcc <8 x i16> @uadd_satqr_v8i16(<8 x i16> %z, <8 x i16> %x, i16 %y) {
1245; CHECK-LABEL: uadd_satqr_v8i16:
1246; CHECK:       @ %bb.0: @ %entry
1247; CHECK-NEXT:    vpt.i16 eq, q0, zr
1248; CHECK-NEXT:    vqaddt.u16 q0, q1, r0
1249; CHECK-NEXT:    bx lr
1250entry:
1251  %c = icmp eq <8 x i16> %z, zeroinitializer
1252  %i = insertelement <8 x i16> undef, i16 %y, i32 0
1253  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
1254  %a = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
1255  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
1256  ret <8 x i16> %b
1257}
1258
1259define arm_aapcs_vfpcc <16 x i8> @uadd_satqr_v16i8(<16 x i8> %z, <16 x i8> %x, i8 %y) {
1260; CHECK-LABEL: uadd_satqr_v16i8:
1261; CHECK:       @ %bb.0: @ %entry
1262; CHECK-NEXT:    vpt.i8 eq, q0, zr
1263; CHECK-NEXT:    vqaddt.u8 q0, q1, r0
1264; CHECK-NEXT:    bx lr
1265entry:
1266  %c = icmp eq <16 x i8> %z, zeroinitializer
1267  %i = insertelement <16 x i8> undef, i8 %y, i32 0
1268  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
1269  %a = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
1270  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
1271  ret <16 x i8> %b
1272}
1273
1274define arm_aapcs_vfpcc <4 x i32> @ssub_satqr_v4i32(<4 x i32> %z, <4 x i32> %x, i32 %y) {
1275; CHECK-LABEL: ssub_satqr_v4i32:
1276; CHECK:       @ %bb.0: @ %entry
1277; CHECK-NEXT:    vpt.i32 eq, q0, zr
1278; CHECK-NEXT:    vqsubt.s32 q0, q1, r0
1279; CHECK-NEXT:    bx lr
1280entry:
1281  %c = icmp eq <4 x i32> %z, zeroinitializer
1282  %i = insertelement <4 x i32> undef, i32 %y, i32 0
1283  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
1284  %a = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
1285  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
1286  ret <4 x i32> %b
1287}
1288
1289define arm_aapcs_vfpcc <8 x i16> @ssub_satqr_v8i16(<8 x i16> %z, <8 x i16> %x, i16 %y) {
1290; CHECK-LABEL: ssub_satqr_v8i16:
1291; CHECK:       @ %bb.0: @ %entry
1292; CHECK-NEXT:    vpt.i16 eq, q0, zr
1293; CHECK-NEXT:    vqsubt.s16 q0, q1, r0
1294; CHECK-NEXT:    bx lr
1295entry:
1296  %c = icmp eq <8 x i16> %z, zeroinitializer
1297  %i = insertelement <8 x i16> undef, i16 %y, i32 0
1298  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
1299  %a = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
1300  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
1301  ret <8 x i16> %b
1302}
1303
1304define arm_aapcs_vfpcc <16 x i8> @ssub_satqr_v16i8(<16 x i8> %z, <16 x i8> %x, i8 %y) {
1305; CHECK-LABEL: ssub_satqr_v16i8:
1306; CHECK:       @ %bb.0: @ %entry
1307; CHECK-NEXT:    vpt.i8 eq, q0, zr
1308; CHECK-NEXT:    vqsubt.s8 q0, q1, r0
1309; CHECK-NEXT:    bx lr
1310entry:
1311  %c = icmp eq <16 x i8> %z, zeroinitializer
1312  %i = insertelement <16 x i8> undef, i8 %y, i32 0
1313  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
1314  %a = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
1315  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
1316  ret <16 x i8> %b
1317}
1318
1319define arm_aapcs_vfpcc <4 x i32> @usub_satqr_v4i32(<4 x i32> %z, <4 x i32> %x, i32 %y) {
1320; CHECK-LABEL: usub_satqr_v4i32:
1321; CHECK:       @ %bb.0: @ %entry
1322; CHECK-NEXT:    vpt.i32 eq, q0, zr
1323; CHECK-NEXT:    vqsubt.u32 q0, q1, r0
1324; CHECK-NEXT:    bx lr
1325entry:
1326  %c = icmp eq <4 x i32> %z, zeroinitializer
1327  %i = insertelement <4 x i32> undef, i32 %y, i32 0
1328  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
1329  %a = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
1330  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z
1331  ret <4 x i32> %b
1332}
1333
1334define arm_aapcs_vfpcc <8 x i16> @usub_satqr_v8i16(<8 x i16> %z, <8 x i16> %x, i16 %y) {
1335; CHECK-LABEL: usub_satqr_v8i16:
1336; CHECK:       @ %bb.0: @ %entry
1337; CHECK-NEXT:    vpt.i16 eq, q0, zr
1338; CHECK-NEXT:    vqsubt.u16 q0, q1, r0
1339; CHECK-NEXT:    bx lr
1340entry:
1341  %c = icmp eq <8 x i16> %z, zeroinitializer
1342  %i = insertelement <8 x i16> undef, i16 %y, i32 0
1343  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
1344  %a = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
1345  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z
1346  ret <8 x i16> %b
1347}
1348
1349define arm_aapcs_vfpcc <16 x i8> @usub_satqr_v16i8(<16 x i8> %z, <16 x i8> %x, i8 %y) {
1350; CHECK-LABEL: usub_satqr_v16i8:
1351; CHECK:       @ %bb.0: @ %entry
1352; CHECK-NEXT:    vpt.i8 eq, q0, zr
1353; CHECK-NEXT:    vqsubt.u8 q0, q1, r0
1354; CHECK-NEXT:    bx lr
1355entry:
1356  %c = icmp eq <16 x i8> %z, zeroinitializer
1357  %i = insertelement <16 x i8> undef, i8 %y, i32 0
1358  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
1359  %a = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
1360  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z
1361  ret <16 x i8> %b
1362}
1363
1364declare <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %src1, <16 x i8> %src2)
1365declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %src1, <8 x i16> %src2)
1366declare <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %src1, <4 x i32> %src2)
1367declare <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %src1, <16 x i8> %src2)
1368declare <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %src1, <8 x i16> %src2)
1369declare <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %src1, <4 x i32> %src2)
1370declare <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %src1, <16 x i8> %src2)
1371declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %src1, <8 x i16> %src2)
1372declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %src1, <4 x i32> %src2)
1373declare <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %src1, <16 x i8> %src2)
1374declare <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %src1, <8 x i16> %src2)
1375declare <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %src1, <4 x i32> %src2)
1376