xref: /llvm-project/llvm/test/CodeGen/Thumb2/mve-pred-selectop2.ll (revision f1961153c2017351244289e1b3164bfa9125996f)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK
3
4define arm_aapcs_vfpcc <4 x i32> @add_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
5; CHECK-LABEL: add_v4i32_x:
6; CHECK:       @ %bb.0: @ %entry
7; CHECK-NEXT:    vctp.32 r0
8; CHECK-NEXT:    vpst
9; CHECK-NEXT:    vaddt.i32 q0, q0, q1
10; CHECK-NEXT:    bx lr
11entry:
12  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
13  %a = add <4 x i32> %x, %y
14  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
15  ret <4 x i32> %b
16}
17
18define arm_aapcs_vfpcc <8 x i16> @add_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
19; CHECK-LABEL: add_v8i16_x:
20; CHECK:       @ %bb.0: @ %entry
21; CHECK-NEXT:    vctp.16 r0
22; CHECK-NEXT:    vpst
23; CHECK-NEXT:    vaddt.i16 q0, q0, q1
24; CHECK-NEXT:    bx lr
25entry:
26  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
27  %a = add <8 x i16> %x, %y
28  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
29  ret <8 x i16> %b
30}
31
32define arm_aapcs_vfpcc <16 x i8> @add_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
33; CHECK-LABEL: add_v16i8_x:
34; CHECK:       @ %bb.0: @ %entry
35; CHECK-NEXT:    vctp.8 r0
36; CHECK-NEXT:    vpst
37; CHECK-NEXT:    vaddt.i8 q0, q0, q1
38; CHECK-NEXT:    bx lr
39entry:
40  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
41  %a = add <16 x i8> %x, %y
42  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
43  ret <16 x i8> %b
44}
45
46define arm_aapcs_vfpcc <4 x i32> @sub_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
47; CHECK-LABEL: sub_v4i32_x:
48; CHECK:       @ %bb.0: @ %entry
49; CHECK-NEXT:    vctp.32 r0
50; CHECK-NEXT:    vpst
51; CHECK-NEXT:    vsubt.i32 q0, q0, q1
52; CHECK-NEXT:    bx lr
53entry:
54  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
55  %a = sub <4 x i32> %x, %y
56  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
57  ret <4 x i32> %b
58}
59
60define arm_aapcs_vfpcc <8 x i16> @sub_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
61; CHECK-LABEL: sub_v8i16_x:
62; CHECK:       @ %bb.0: @ %entry
63; CHECK-NEXT:    vctp.16 r0
64; CHECK-NEXT:    vpst
65; CHECK-NEXT:    vsubt.i16 q0, q0, q1
66; CHECK-NEXT:    bx lr
67entry:
68  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
69  %a = sub <8 x i16> %x, %y
70  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
71  ret <8 x i16> %b
72}
73
74define arm_aapcs_vfpcc <16 x i8> @sub_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
75; CHECK-LABEL: sub_v16i8_x:
76; CHECK:       @ %bb.0: @ %entry
77; CHECK-NEXT:    vctp.8 r0
78; CHECK-NEXT:    vpst
79; CHECK-NEXT:    vsubt.i8 q0, q0, q1
80; CHECK-NEXT:    bx lr
81entry:
82  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
83  %a = sub <16 x i8> %x, %y
84  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
85  ret <16 x i8> %b
86}
87
88define arm_aapcs_vfpcc <4 x i32> @mul_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
89; CHECK-LABEL: mul_v4i32_x:
90; CHECK:       @ %bb.0: @ %entry
91; CHECK-NEXT:    vctp.32 r0
92; CHECK-NEXT:    vpst
93; CHECK-NEXT:    vmult.i32 q0, q0, q1
94; CHECK-NEXT:    bx lr
95entry:
96  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
97  %a = mul <4 x i32> %x, %y
98  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
99  ret <4 x i32> %b
100}
101
102define arm_aapcs_vfpcc <8 x i16> @mul_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
103; CHECK-LABEL: mul_v8i16_x:
104; CHECK:       @ %bb.0: @ %entry
105; CHECK-NEXT:    vctp.16 r0
106; CHECK-NEXT:    vpst
107; CHECK-NEXT:    vmult.i16 q0, q0, q1
108; CHECK-NEXT:    bx lr
109entry:
110  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
111  %a = mul <8 x i16> %x, %y
112  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
113  ret <8 x i16> %b
114}
115
116define arm_aapcs_vfpcc <16 x i8> @mul_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
117; CHECK-LABEL: mul_v16i8_x:
118; CHECK:       @ %bb.0: @ %entry
119; CHECK-NEXT:    vctp.8 r0
120; CHECK-NEXT:    vpst
121; CHECK-NEXT:    vmult.i8 q0, q0, q1
122; CHECK-NEXT:    bx lr
123entry:
124  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
125  %a = mul <16 x i8> %x, %y
126  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
127  ret <16 x i8> %b
128}
129
130define arm_aapcs_vfpcc <4 x i32> @and_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
131; CHECK-LABEL: and_v4i32_x:
132; CHECK:       @ %bb.0: @ %entry
133; CHECK-NEXT:    vctp.32 r0
134; CHECK-NEXT:    vpst
135; CHECK-NEXT:    vandt q0, q0, q1
136; CHECK-NEXT:    bx lr
137entry:
138  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
139  %a = and <4 x i32> %x, %y
140  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
141  ret <4 x i32> %b
142}
143
144define arm_aapcs_vfpcc <8 x i16> @and_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
145; CHECK-LABEL: and_v8i16_x:
146; CHECK:       @ %bb.0: @ %entry
147; CHECK-NEXT:    vctp.16 r0
148; CHECK-NEXT:    vpst
149; CHECK-NEXT:    vandt q0, q0, q1
150; CHECK-NEXT:    bx lr
151entry:
152  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
153  %a = and <8 x i16> %x, %y
154  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
155  ret <8 x i16> %b
156}
157
158define arm_aapcs_vfpcc <16 x i8> @and_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
159; CHECK-LABEL: and_v16i8_x:
160; CHECK:       @ %bb.0: @ %entry
161; CHECK-NEXT:    vctp.8 r0
162; CHECK-NEXT:    vpst
163; CHECK-NEXT:    vandt q0, q0, q1
164; CHECK-NEXT:    bx lr
165entry:
166  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
167  %a = and <16 x i8> %x, %y
168  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
169  ret <16 x i8> %b
170}
171
172define arm_aapcs_vfpcc <4 x i32> @or_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
173; CHECK-LABEL: or_v4i32_x:
174; CHECK:       @ %bb.0: @ %entry
175; CHECK-NEXT:    vctp.32 r0
176; CHECK-NEXT:    vpst
177; CHECK-NEXT:    vorrt q0, q0, q1
178; CHECK-NEXT:    bx lr
179entry:
180  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
181  %a = or <4 x i32> %x, %y
182  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
183  ret <4 x i32> %b
184}
185
186define arm_aapcs_vfpcc <8 x i16> @or_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
187; CHECK-LABEL: or_v8i16_x:
188; CHECK:       @ %bb.0: @ %entry
189; CHECK-NEXT:    vctp.16 r0
190; CHECK-NEXT:    vpst
191; CHECK-NEXT:    vorrt q0, q0, q1
192; CHECK-NEXT:    bx lr
193entry:
194  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
195  %a = or <8 x i16> %x, %y
196  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
197  ret <8 x i16> %b
198}
199
200define arm_aapcs_vfpcc <16 x i8> @or_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
201; CHECK-LABEL: or_v16i8_x:
202; CHECK:       @ %bb.0: @ %entry
203; CHECK-NEXT:    vctp.8 r0
204; CHECK-NEXT:    vpst
205; CHECK-NEXT:    vorrt q0, q0, q1
206; CHECK-NEXT:    bx lr
207entry:
208  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
209  %a = or <16 x i8> %x, %y
210  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
211  ret <16 x i8> %b
212}
213
214define arm_aapcs_vfpcc <4 x i32> @xor_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
215; CHECK-LABEL: xor_v4i32_x:
216; CHECK:       @ %bb.0: @ %entry
217; CHECK-NEXT:    vctp.32 r0
218; CHECK-NEXT:    vpst
219; CHECK-NEXT:    veort q0, q0, q1
220; CHECK-NEXT:    bx lr
221entry:
222  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
223  %a = xor <4 x i32> %x, %y
224  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
225  ret <4 x i32> %b
226}
227
228define arm_aapcs_vfpcc <8 x i16> @xor_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
229; CHECK-LABEL: xor_v8i16_x:
230; CHECK:       @ %bb.0: @ %entry
231; CHECK-NEXT:    vctp.16 r0
232; CHECK-NEXT:    vpst
233; CHECK-NEXT:    veort q0, q0, q1
234; CHECK-NEXT:    bx lr
235entry:
236  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
237  %a = xor <8 x i16> %x, %y
238  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
239  ret <8 x i16> %b
240}
241
242define arm_aapcs_vfpcc <16 x i8> @xor_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
243; CHECK-LABEL: xor_v16i8_x:
244; CHECK:       @ %bb.0: @ %entry
245; CHECK-NEXT:    vctp.8 r0
246; CHECK-NEXT:    vpst
247; CHECK-NEXT:    veort q0, q0, q1
248; CHECK-NEXT:    bx lr
249entry:
250  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
251  %a = xor <16 x i8> %x, %y
252  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
253  ret <16 x i8> %b
254}
255
256define arm_aapcs_vfpcc <4 x i32> @shl_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
257; CHECK-LABEL: shl_v4i32_x:
258; CHECK:       @ %bb.0: @ %entry
259; CHECK-NEXT:    vctp.32 r0
260; CHECK-NEXT:    vpst
261; CHECK-NEXT:    vshlt.u32 q0, q0, q1
262; CHECK-NEXT:    bx lr
263entry:
264  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
265  %a = shl <4 x i32> %x, %y
266  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
267  ret <4 x i32> %b
268}
269
270define arm_aapcs_vfpcc <8 x i16> @shl_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
271; CHECK-LABEL: shl_v8i16_x:
272; CHECK:       @ %bb.0: @ %entry
273; CHECK-NEXT:    vctp.16 r0
274; CHECK-NEXT:    vpst
275; CHECK-NEXT:    vshlt.u16 q0, q0, q1
276; CHECK-NEXT:    bx lr
277entry:
278  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
279  %a = shl <8 x i16> %x, %y
280  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
281  ret <8 x i16> %b
282}
283
284define arm_aapcs_vfpcc <16 x i8> @shl_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
285; CHECK-LABEL: shl_v16i8_x:
286; CHECK:       @ %bb.0: @ %entry
287; CHECK-NEXT:    vctp.8 r0
288; CHECK-NEXT:    vpst
289; CHECK-NEXT:    vshlt.u8 q0, q0, q1
290; CHECK-NEXT:    bx lr
291entry:
292  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
293  %a = shl <16 x i8> %x, %y
294  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
295  ret <16 x i8> %b
296}
297
298define arm_aapcs_vfpcc <4 x i32> @ashr_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
299; CHECK-LABEL: ashr_v4i32_x:
300; CHECK:       @ %bb.0: @ %entry
301; CHECK-NEXT:    vneg.s32 q1, q1
302; CHECK-NEXT:    vctp.32 r0
303; CHECK-NEXT:    vpst
304; CHECK-NEXT:    vshlt.s32 q0, q0, q1
305; CHECK-NEXT:    bx lr
306entry:
307  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
308  %a = ashr <4 x i32> %x, %y
309  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
310  ret <4 x i32> %b
311}
312
313define arm_aapcs_vfpcc <8 x i16> @ashr_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
314; CHECK-LABEL: ashr_v8i16_x:
315; CHECK:       @ %bb.0: @ %entry
316; CHECK-NEXT:    vneg.s16 q1, q1
317; CHECK-NEXT:    vctp.16 r0
318; CHECK-NEXT:    vpst
319; CHECK-NEXT:    vshlt.s16 q0, q0, q1
320; CHECK-NEXT:    bx lr
321entry:
322  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
323  %a = ashr <8 x i16> %x, %y
324  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
325  ret <8 x i16> %b
326}
327
328define arm_aapcs_vfpcc <16 x i8> @ashr_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
329; CHECK-LABEL: ashr_v16i8_x:
330; CHECK:       @ %bb.0: @ %entry
331; CHECK-NEXT:    vneg.s8 q1, q1
332; CHECK-NEXT:    vctp.8 r0
333; CHECK-NEXT:    vpst
334; CHECK-NEXT:    vshlt.s8 q0, q0, q1
335; CHECK-NEXT:    bx lr
336entry:
337  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
338  %a = ashr <16 x i8> %x, %y
339  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
340  ret <16 x i8> %b
341}
342
343define arm_aapcs_vfpcc <4 x i32> @lshr_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
344; CHECK-LABEL: lshr_v4i32_x:
345; CHECK:       @ %bb.0: @ %entry
346; CHECK-NEXT:    vneg.s32 q1, q1
347; CHECK-NEXT:    vctp.32 r0
348; CHECK-NEXT:    vpst
349; CHECK-NEXT:    vshlt.u32 q0, q0, q1
350; CHECK-NEXT:    bx lr
351entry:
352  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
353  %a = lshr <4 x i32> %x, %y
354  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
355  ret <4 x i32> %b
356}
357
358define arm_aapcs_vfpcc <8 x i16> @lshr_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
359; CHECK-LABEL: lshr_v8i16_x:
360; CHECK:       @ %bb.0: @ %entry
361; CHECK-NEXT:    vneg.s16 q1, q1
362; CHECK-NEXT:    vctp.16 r0
363; CHECK-NEXT:    vpst
364; CHECK-NEXT:    vshlt.u16 q0, q0, q1
365; CHECK-NEXT:    bx lr
366entry:
367  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
368  %a = lshr <8 x i16> %x, %y
369  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
370  ret <8 x i16> %b
371}
372
373define arm_aapcs_vfpcc <16 x i8> @lshr_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
374; CHECK-LABEL: lshr_v16i8_x:
375; CHECK:       @ %bb.0: @ %entry
376; CHECK-NEXT:    vneg.s8 q1, q1
377; CHECK-NEXT:    vctp.8 r0
378; CHECK-NEXT:    vpst
379; CHECK-NEXT:    vshlt.u8 q0, q0, q1
380; CHECK-NEXT:    bx lr
381entry:
382  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
383  %a = lshr <16 x i8> %x, %y
384  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
385  ret <16 x i8> %b
386}
387
388define arm_aapcs_vfpcc <4 x i32> @andnot_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
389; CHECK-LABEL: andnot_v4i32_x:
390; CHECK:       @ %bb.0: @ %entry
391; CHECK-NEXT:    vctp.32 r0
392; CHECK-NEXT:    vpst
393; CHECK-NEXT:    vbict q0, q0, q1
394; CHECK-NEXT:    bx lr
395entry:
396  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
397  %y1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
398  %a = and <4 x i32> %x, %y1
399  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
400  ret <4 x i32> %b
401}
402
403define arm_aapcs_vfpcc <8 x i16> @andnot_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
404; CHECK-LABEL: andnot_v8i16_x:
405; CHECK:       @ %bb.0: @ %entry
406; CHECK-NEXT:    vctp.16 r0
407; CHECK-NEXT:    vpst
408; CHECK-NEXT:    vbict q0, q0, q1
409; CHECK-NEXT:    bx lr
410entry:
411  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
412  %y1 = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
413  %a = and <8 x i16> %x, %y1
414  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
415  ret <8 x i16> %b
416}
417
418define arm_aapcs_vfpcc <16 x i8> @andnot_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
419; CHECK-LABEL: andnot_v16i8_x:
420; CHECK:       @ %bb.0: @ %entry
421; CHECK-NEXT:    vctp.8 r0
422; CHECK-NEXT:    vpst
423; CHECK-NEXT:    vbict q0, q0, q1
424; CHECK-NEXT:    bx lr
425entry:
426  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
427  %y1 = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
428  %a = and <16 x i8> %x, %y1
429  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
430  ret <16 x i8> %b
431}
432
433define arm_aapcs_vfpcc <4 x i32> @ornot_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
434; CHECK-LABEL: ornot_v4i32_x:
435; CHECK:       @ %bb.0: @ %entry
436; CHECK-NEXT:    vctp.32 r0
437; CHECK-NEXT:    vpst
438; CHECK-NEXT:    vornt q0, q0, q1
439; CHECK-NEXT:    bx lr
440entry:
441  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
442  %y1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
443  %a = or <4 x i32> %x, %y1
444  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
445  ret <4 x i32> %b
446}
447
448define arm_aapcs_vfpcc <8 x i16> @ornot_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
449; CHECK-LABEL: ornot_v8i16_x:
450; CHECK:       @ %bb.0: @ %entry
451; CHECK-NEXT:    vctp.16 r0
452; CHECK-NEXT:    vpst
453; CHECK-NEXT:    vornt q0, q0, q1
454; CHECK-NEXT:    bx lr
455entry:
456  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
457  %y1 = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
458  %a = or <8 x i16> %x, %y1
459  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
460  ret <8 x i16> %b
461}
462
463define arm_aapcs_vfpcc <16 x i8> @ornot_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
464; CHECK-LABEL: ornot_v16i8_x:
465; CHECK:       @ %bb.0: @ %entry
466; CHECK-NEXT:    vctp.8 r0
467; CHECK-NEXT:    vpst
468; CHECK-NEXT:    vornt q0, q0, q1
469; CHECK-NEXT:    bx lr
470entry:
471  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
472  %y1 = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
473  %a = or <16 x i8> %x, %y1
474  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
475  ret <16 x i8> %b
476}
477
478define arm_aapcs_vfpcc <4 x float> @fadd_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) {
479; CHECK-LABEL: fadd_v4f32_x:
480; CHECK:       @ %bb.0: @ %entry
481; CHECK-NEXT:    vctp.32 r0
482; CHECK-NEXT:    vpst
483; CHECK-NEXT:    vaddt.f32 q0, q0, q1
484; CHECK-NEXT:    bx lr
485entry:
486  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
487  %a = fadd <4 x float> %x, %y
488  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x
489  ret <4 x float> %b
490}
491
492define arm_aapcs_vfpcc <8 x half> @fadd_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) {
493; CHECK-LABEL: fadd_v8f16_x:
494; CHECK:       @ %bb.0: @ %entry
495; CHECK-NEXT:    vctp.16 r0
496; CHECK-NEXT:    vpst
497; CHECK-NEXT:    vaddt.f16 q0, q0, q1
498; CHECK-NEXT:    bx lr
499entry:
500  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
501  %a = fadd <8 x half> %x, %y
502  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x
503  ret <8 x half> %b
504}
505
506define arm_aapcs_vfpcc <4 x float> @fsub_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) {
507; CHECK-LABEL: fsub_v4f32_x:
508; CHECK:       @ %bb.0: @ %entry
509; CHECK-NEXT:    vctp.32 r0
510; CHECK-NEXT:    vpst
511; CHECK-NEXT:    vsubt.f32 q0, q0, q1
512; CHECK-NEXT:    bx lr
513entry:
514  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
515  %a = fsub <4 x float> %x, %y
516  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x
517  ret <4 x float> %b
518}
519
520define arm_aapcs_vfpcc <8 x half> @fsub_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) {
521; CHECK-LABEL: fsub_v8f16_x:
522; CHECK:       @ %bb.0: @ %entry
523; CHECK-NEXT:    vctp.16 r0
524; CHECK-NEXT:    vpst
525; CHECK-NEXT:    vsubt.f16 q0, q0, q1
526; CHECK-NEXT:    bx lr
527entry:
528  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
529  %a = fsub <8 x half> %x, %y
530  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x
531  ret <8 x half> %b
532}
533
534define arm_aapcs_vfpcc <4 x float> @fmul_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) {
535; CHECK-LABEL: fmul_v4f32_x:
536; CHECK:       @ %bb.0: @ %entry
537; CHECK-NEXT:    vctp.32 r0
538; CHECK-NEXT:    vpst
539; CHECK-NEXT:    vmult.f32 q0, q0, q1
540; CHECK-NEXT:    bx lr
541entry:
542  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
543  %a = fmul <4 x float> %x, %y
544  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x
545  ret <4 x float> %b
546}
547
548define arm_aapcs_vfpcc <8 x half> @fmul_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) {
549; CHECK-LABEL: fmul_v8f16_x:
550; CHECK:       @ %bb.0: @ %entry
551; CHECK-NEXT:    vctp.16 r0
552; CHECK-NEXT:    vpst
553; CHECK-NEXT:    vmult.f16 q0, q0, q1
554; CHECK-NEXT:    bx lr
555entry:
556  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
557  %a = fmul <8 x half> %x, %y
558  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x
559  ret <8 x half> %b
560}
561
562define arm_aapcs_vfpcc <4 x float> @fdiv_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) {
563; CHECK-LABEL: fdiv_v4f32_x:
564; CHECK:       @ %bb.0: @ %entry
565; CHECK-NEXT:    vdiv.f32 s7, s3, s7
566; CHECK-NEXT:    vctp.32 r0
567; CHECK-NEXT:    vdiv.f32 s6, s2, s6
568; CHECK-NEXT:    vdiv.f32 s5, s1, s5
569; CHECK-NEXT:    vdiv.f32 s4, s0, s4
570; CHECK-NEXT:    vpst
571; CHECK-NEXT:    vmovt q0, q1
572; CHECK-NEXT:    bx lr
573entry:
574  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
575  %a = fdiv <4 x float> %x, %y
576  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x
577  ret <4 x float> %b
578}
579
580define arm_aapcs_vfpcc <8 x half> @fdiv_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) {
581; CHECK-LABEL: fdiv_v8f16_x:
582; CHECK:       @ %bb.0: @ %entry
583; CHECK-NEXT:    vmovx.f16 s8, s4
584; CHECK-NEXT:    vmovx.f16 s10, s0
585; CHECK-NEXT:    vdiv.f16 s8, s10, s8
586; CHECK-NEXT:    vdiv.f16 s4, s0, s4
587; CHECK-NEXT:    vins.f16 s4, s8
588; CHECK-NEXT:    vmovx.f16 s8, s5
589; CHECK-NEXT:    vmovx.f16 s10, s1
590; CHECK-NEXT:    vdiv.f16 s5, s1, s5
591; CHECK-NEXT:    vdiv.f16 s8, s10, s8
592; CHECK-NEXT:    vmovx.f16 s10, s2
593; CHECK-NEXT:    vins.f16 s5, s8
594; CHECK-NEXT:    vmovx.f16 s8, s6
595; CHECK-NEXT:    vdiv.f16 s8, s10, s8
596; CHECK-NEXT:    vdiv.f16 s6, s2, s6
597; CHECK-NEXT:    vins.f16 s6, s8
598; CHECK-NEXT:    vmovx.f16 s8, s7
599; CHECK-NEXT:    vmovx.f16 s10, s3
600; CHECK-NEXT:    vdiv.f16 s7, s3, s7
601; CHECK-NEXT:    vdiv.f16 s8, s10, s8
602; CHECK-NEXT:    vctp.16 r0
603; CHECK-NEXT:    vins.f16 s7, s8
604; CHECK-NEXT:    vpst
605; CHECK-NEXT:    vmovt q0, q1
606; CHECK-NEXT:    bx lr
607entry:
608  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
609  %a = fdiv <8 x half> %x, %y
610  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x
611  ret <8 x half> %b
612}
613
614define arm_aapcs_vfpcc <4 x float> @fmai_v4f32_x(<4 x float> %x, <4 x float> %y, <4 x float> %z, i32 %n) {
615; CHECK-LABEL: fmai_v4f32_x:
616; CHECK:       @ %bb.0: @ %entry
617; CHECK-NEXT:    vctp.32 r0
618; CHECK-NEXT:    vpst
619; CHECK-NEXT:    vfmat.f32 q0, q1, q2
620; CHECK-NEXT:    bx lr
621entry:
622  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
623  %a = call <4 x float> @llvm.fma.v4f32(<4 x float> %y, <4 x float> %z, <4 x float> %x)
624  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x
625  ret <4 x float> %b
626}
627
628define arm_aapcs_vfpcc <8 x half> @fmai_v8f16_x(<8 x half> %x, <8 x half> %y, <8 x half> %z, i32 %n) {
629; CHECK-LABEL: fmai_v8f16_x:
630; CHECK:       @ %bb.0: @ %entry
631; CHECK-NEXT:    vctp.16 r0
632; CHECK-NEXT:    vpst
633; CHECK-NEXT:    vfmat.f16 q0, q1, q2
634; CHECK-NEXT:    bx lr
635entry:
636  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
637  %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %y, <8 x half> %z, <8 x half> %x)
638  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x
639  ret <8 x half> %b
640}
641
642define arm_aapcs_vfpcc <4 x float> @fma_v4f32_x(<4 x float> %x, <4 x float> %y, <4 x float> %z, i32 %n) {
643; CHECK-LABEL: fma_v4f32_x:
644; CHECK:       @ %bb.0: @ %entry
645; CHECK-NEXT:    vctp.32 r0
646; CHECK-NEXT:    vpst
647; CHECK-NEXT:    vfmat.f32 q0, q1, q2
648; CHECK-NEXT:    bx lr
649entry:
650  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
651  %m = fmul fast <4 x float> %y, %z
652  %a = fadd fast <4 x float> %m, %x
653  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x
654  ret <4 x float> %b
655}
656
657define arm_aapcs_vfpcc <8 x half> @fma_v8f16_x(<8 x half> %x, <8 x half> %y, <8 x half> %z, i32 %n) {
658; CHECK-LABEL: fma_v8f16_x:
659; CHECK:       @ %bb.0: @ %entry
660; CHECK-NEXT:    vctp.16 r0
661; CHECK-NEXT:    vpst
662; CHECK-NEXT:    vfmat.f16 q0, q1, q2
663; CHECK-NEXT:    bx lr
664entry:
665  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
666  %m = fmul fast <8 x half> %y, %z
667  %a = fadd fast <8 x half> %m, %x
668  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x
669  ret <8 x half> %b
670}
671
672define arm_aapcs_vfpcc <4 x i32> @icmp_slt_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
673; CHECK-LABEL: icmp_slt_v4i32_x:
674; CHECK:       @ %bb.0: @ %entry
675; CHECK-NEXT:    vctp.32 r0
676; CHECK-NEXT:    vpst
677; CHECK-NEXT:    vmint.s32 q0, q0, q1
678; CHECK-NEXT:    bx lr
679entry:
680  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
681  %a1 = icmp slt <4 x i32> %x, %y
682  %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y
683  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
684  ret <4 x i32> %b
685}
686
687define arm_aapcs_vfpcc <8 x i16> @icmp_slt_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
688; CHECK-LABEL: icmp_slt_v8i16_x:
689; CHECK:       @ %bb.0: @ %entry
690; CHECK-NEXT:    vctp.16 r0
691; CHECK-NEXT:    vpst
692; CHECK-NEXT:    vmint.s16 q0, q0, q1
693; CHECK-NEXT:    bx lr
694entry:
695  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
696  %a1 = icmp slt <8 x i16> %x, %y
697  %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y
698  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
699  ret <8 x i16> %b
700}
701
702define arm_aapcs_vfpcc <16 x i8> @icmp_slt_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
703; CHECK-LABEL: icmp_slt_v16i8_x:
704; CHECK:       @ %bb.0: @ %entry
705; CHECK-NEXT:    vctp.8 r0
706; CHECK-NEXT:    vpst
707; CHECK-NEXT:    vmint.s8 q0, q0, q1
708; CHECK-NEXT:    bx lr
709entry:
710  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
711  %a1 = icmp slt <16 x i8> %x, %y
712  %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y
713  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
714  ret <16 x i8> %b
715}
716
717define arm_aapcs_vfpcc <4 x i32> @icmp_sgt_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
718; CHECK-LABEL: icmp_sgt_v4i32_x:
719; CHECK:       @ %bb.0: @ %entry
720; CHECK-NEXT:    vctp.32 r0
721; CHECK-NEXT:    vpst
722; CHECK-NEXT:    vmaxt.s32 q0, q0, q1
723; CHECK-NEXT:    bx lr
724entry:
725  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
726  %a1 = icmp sgt <4 x i32> %x, %y
727  %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y
728  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
729  ret <4 x i32> %b
730}
731
732define arm_aapcs_vfpcc <8 x i16> @icmp_sgt_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
733; CHECK-LABEL: icmp_sgt_v8i16_x:
734; CHECK:       @ %bb.0: @ %entry
735; CHECK-NEXT:    vctp.16 r0
736; CHECK-NEXT:    vpst
737; CHECK-NEXT:    vmaxt.s16 q0, q0, q1
738; CHECK-NEXT:    bx lr
739entry:
740  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
741  %a1 = icmp sgt <8 x i16> %x, %y
742  %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y
743  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
744  ret <8 x i16> %b
745}
746
747define arm_aapcs_vfpcc <16 x i8> @icmp_sgt_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
748; CHECK-LABEL: icmp_sgt_v16i8_x:
749; CHECK:       @ %bb.0: @ %entry
750; CHECK-NEXT:    vctp.8 r0
751; CHECK-NEXT:    vpst
752; CHECK-NEXT:    vmaxt.s8 q0, q0, q1
753; CHECK-NEXT:    bx lr
754entry:
755  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
756  %a1 = icmp sgt <16 x i8> %x, %y
757  %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y
758  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
759  ret <16 x i8> %b
760}
761
762define arm_aapcs_vfpcc <4 x i32> @icmp_ult_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
763; CHECK-LABEL: icmp_ult_v4i32_x:
764; CHECK:       @ %bb.0: @ %entry
765; CHECK-NEXT:    vctp.32 r0
766; CHECK-NEXT:    vpst
767; CHECK-NEXT:    vmint.u32 q0, q0, q1
768; CHECK-NEXT:    bx lr
769entry:
770  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
771  %a1 = icmp ult <4 x i32> %x, %y
772  %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y
773  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
774  ret <4 x i32> %b
775}
776
777define arm_aapcs_vfpcc <8 x i16> @icmp_ult_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
778; CHECK-LABEL: icmp_ult_v8i16_x:
779; CHECK:       @ %bb.0: @ %entry
780; CHECK-NEXT:    vctp.16 r0
781; CHECK-NEXT:    vpst
782; CHECK-NEXT:    vmint.u16 q0, q0, q1
783; CHECK-NEXT:    bx lr
784entry:
785  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
786  %a1 = icmp ult <8 x i16> %x, %y
787  %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y
788  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
789  ret <8 x i16> %b
790}
791
792define arm_aapcs_vfpcc <16 x i8> @icmp_ult_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
793; CHECK-LABEL: icmp_ult_v16i8_x:
794; CHECK:       @ %bb.0: @ %entry
795; CHECK-NEXT:    vctp.8 r0
796; CHECK-NEXT:    vpst
797; CHECK-NEXT:    vmint.u8 q0, q0, q1
798; CHECK-NEXT:    bx lr
799entry:
800  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
801  %a1 = icmp ult <16 x i8> %x, %y
802  %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y
803  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
804  ret <16 x i8> %b
805}
806
807define arm_aapcs_vfpcc <4 x i32> @icmp_ugt_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
808; CHECK-LABEL: icmp_ugt_v4i32_x:
809; CHECK:       @ %bb.0: @ %entry
810; CHECK-NEXT:    vctp.32 r0
811; CHECK-NEXT:    vpst
812; CHECK-NEXT:    vmaxt.u32 q0, q0, q1
813; CHECK-NEXT:    bx lr
814entry:
815  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
816  %a1 = icmp ugt <4 x i32> %x, %y
817  %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y
818  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
819  ret <4 x i32> %b
820}
821
822define arm_aapcs_vfpcc <8 x i16> @icmp_ugt_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
823; CHECK-LABEL: icmp_ugt_v8i16_x:
824; CHECK:       @ %bb.0: @ %entry
825; CHECK-NEXT:    vctp.16 r0
826; CHECK-NEXT:    vpst
827; CHECK-NEXT:    vmaxt.u16 q0, q0, q1
828; CHECK-NEXT:    bx lr
829entry:
830  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
831  %a1 = icmp ugt <8 x i16> %x, %y
832  %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y
833  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
834  ret <8 x i16> %b
835}
836
837define arm_aapcs_vfpcc <16 x i8> @icmp_ugt_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
838; CHECK-LABEL: icmp_ugt_v16i8_x:
839; CHECK:       @ %bb.0: @ %entry
840; CHECK-NEXT:    vctp.8 r0
841; CHECK-NEXT:    vpst
842; CHECK-NEXT:    vmaxt.u8 q0, q0, q1
843; CHECK-NEXT:    bx lr
844entry:
845  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
846  %a1 = icmp ugt <16 x i8> %x, %y
847  %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y
848  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
849  ret <16 x i8> %b
850}
851
852define arm_aapcs_vfpcc <4 x float> @fcmp_fast_olt_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) {
853; CHECK-LABEL: fcmp_fast_olt_v4f32_x:
854; CHECK:       @ %bb.0: @ %entry
855; CHECK-NEXT:    vctp.32 r0
856; CHECK-NEXT:    vpst
857; CHECK-NEXT:    vminnmt.f32 q0, q0, q1
858; CHECK-NEXT:    bx lr
859entry:
860  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
861  %a1 = fcmp fast olt <4 x float> %x, %y
862  %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y
863  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x
864  ret <4 x float> %b
865}
866
867define arm_aapcs_vfpcc <8 x half> @fcmp_fast_olt_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) {
868; CHECK-LABEL: fcmp_fast_olt_v8f16_x:
869; CHECK:       @ %bb.0: @ %entry
870; CHECK-NEXT:    vctp.16 r0
871; CHECK-NEXT:    vpst
872; CHECK-NEXT:    vminnmt.f16 q0, q0, q1
873; CHECK-NEXT:    bx lr
874entry:
875  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
876  %a1 = fcmp fast olt <8 x half> %x, %y
877  %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y
878  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x
879  ret <8 x half> %b
880}
881
882define arm_aapcs_vfpcc <4 x float> @fcmp_fast_ogt_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) {
883; CHECK-LABEL: fcmp_fast_ogt_v4f32_x:
884; CHECK:       @ %bb.0: @ %entry
885; CHECK-NEXT:    vctp.32 r0
886; CHECK-NEXT:    vpst
887; CHECK-NEXT:    vmaxnmt.f32 q0, q0, q1
888; CHECK-NEXT:    bx lr
889entry:
890  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
891  %a1 = fcmp fast ogt <4 x float> %x, %y
892  %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y
893  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x
894  ret <4 x float> %b
895}
896
897define arm_aapcs_vfpcc <8 x half> @fcmp_fast_ogt_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) {
898; CHECK-LABEL: fcmp_fast_ogt_v8f16_x:
899; CHECK:       @ %bb.0: @ %entry
900; CHECK-NEXT:    vctp.16 r0
901; CHECK-NEXT:    vpst
902; CHECK-NEXT:    vmaxnmt.f16 q0, q0, q1
903; CHECK-NEXT:    bx lr
904entry:
905  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
906  %a1 = fcmp fast ogt <8 x half> %x, %y
907  %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y
908  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x
909  ret <8 x half> %b
910}
911
912define arm_aapcs_vfpcc <4 x i32> @sadd_sat_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
913; CHECK-LABEL: sadd_sat_v4i32_x:
914; CHECK:       @ %bb.0: @ %entry
915; CHECK-NEXT:    vctp.32 r0
916; CHECK-NEXT:    vpst
917; CHECK-NEXT:    vqaddt.s32 q0, q0, q1
918; CHECK-NEXT:    bx lr
919entry:
920  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
921  %a = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
922  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
923  ret <4 x i32> %b
924}
925
926define arm_aapcs_vfpcc <8 x i16> @sadd_sat_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
927; CHECK-LABEL: sadd_sat_v8i16_x:
928; CHECK:       @ %bb.0: @ %entry
929; CHECK-NEXT:    vctp.16 r0
930; CHECK-NEXT:    vpst
931; CHECK-NEXT:    vqaddt.s16 q0, q0, q1
932; CHECK-NEXT:    bx lr
933entry:
934  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
935  %a = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
936  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
937  ret <8 x i16> %b
938}
939
940define arm_aapcs_vfpcc <16 x i8> @sadd_sat_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
941; CHECK-LABEL: sadd_sat_v16i8_x:
942; CHECK:       @ %bb.0: @ %entry
943; CHECK-NEXT:    vctp.8 r0
944; CHECK-NEXT:    vpst
945; CHECK-NEXT:    vqaddt.s8 q0, q0, q1
946; CHECK-NEXT:    bx lr
947entry:
948  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
949  %a = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
950  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
951  ret <16 x i8> %b
952}
953
954define arm_aapcs_vfpcc <4 x i32> @uadd_sat_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
955; CHECK-LABEL: uadd_sat_v4i32_x:
956; CHECK:       @ %bb.0: @ %entry
957; CHECK-NEXT:    vctp.32 r0
958; CHECK-NEXT:    vpst
959; CHECK-NEXT:    vqaddt.u32 q0, q0, q1
960; CHECK-NEXT:    bx lr
961entry:
962  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
963  %a = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
964  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
965  ret <4 x i32> %b
966}
967
968define arm_aapcs_vfpcc <8 x i16> @uadd_sat_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
969; CHECK-LABEL: uadd_sat_v8i16_x:
970; CHECK:       @ %bb.0: @ %entry
971; CHECK-NEXT:    vctp.16 r0
972; CHECK-NEXT:    vpst
973; CHECK-NEXT:    vqaddt.u16 q0, q0, q1
974; CHECK-NEXT:    bx lr
975entry:
976  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
977  %a = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
978  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
979  ret <8 x i16> %b
980}
981
982define arm_aapcs_vfpcc <16 x i8> @uadd_sat_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
983; CHECK-LABEL: uadd_sat_v16i8_x:
984; CHECK:       @ %bb.0: @ %entry
985; CHECK-NEXT:    vctp.8 r0
986; CHECK-NEXT:    vpst
987; CHECK-NEXT:    vqaddt.u8 q0, q0, q1
988; CHECK-NEXT:    bx lr
989entry:
990  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
991  %a = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
992  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
993  ret <16 x i8> %b
994}
995
996define arm_aapcs_vfpcc <4 x i32> @ssub_sat_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
997; CHECK-LABEL: ssub_sat_v4i32_x:
998; CHECK:       @ %bb.0: @ %entry
999; CHECK-NEXT:    vctp.32 r0
1000; CHECK-NEXT:    vpst
1001; CHECK-NEXT:    vqsubt.s32 q0, q0, q1
1002; CHECK-NEXT:    bx lr
1003entry:
1004  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1005  %a = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
1006  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
1007  ret <4 x i32> %b
1008}
1009
1010define arm_aapcs_vfpcc <8 x i16> @ssub_sat_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1011; CHECK-LABEL: ssub_sat_v8i16_x:
1012; CHECK:       @ %bb.0: @ %entry
1013; CHECK-NEXT:    vctp.16 r0
1014; CHECK-NEXT:    vpst
1015; CHECK-NEXT:    vqsubt.s16 q0, q0, q1
1016; CHECK-NEXT:    bx lr
1017entry:
1018  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1019  %a = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
1020  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
1021  ret <8 x i16> %b
1022}
1023
1024define arm_aapcs_vfpcc <16 x i8> @ssub_sat_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1025; CHECK-LABEL: ssub_sat_v16i8_x:
1026; CHECK:       @ %bb.0: @ %entry
1027; CHECK-NEXT:    vctp.8 r0
1028; CHECK-NEXT:    vpst
1029; CHECK-NEXT:    vqsubt.s8 q0, q0, q1
1030; CHECK-NEXT:    bx lr
1031entry:
1032  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1033  %a = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
1034  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
1035  ret <16 x i8> %b
1036}
1037
1038define arm_aapcs_vfpcc <4 x i32> @usub_sat_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1039; CHECK-LABEL: usub_sat_v4i32_x:
1040; CHECK:       @ %bb.0: @ %entry
1041; CHECK-NEXT:    vctp.32 r0
1042; CHECK-NEXT:    vpst
1043; CHECK-NEXT:    vqsubt.u32 q0, q0, q1
1044; CHECK-NEXT:    bx lr
1045entry:
1046  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1047  %a = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
1048  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
1049  ret <4 x i32> %b
1050}
1051
1052define arm_aapcs_vfpcc <8 x i16> @usub_sat_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1053; CHECK-LABEL: usub_sat_v8i16_x:
1054; CHECK:       @ %bb.0: @ %entry
1055; CHECK-NEXT:    vctp.16 r0
1056; CHECK-NEXT:    vpst
1057; CHECK-NEXT:    vqsubt.u16 q0, q0, q1
1058; CHECK-NEXT:    bx lr
1059entry:
1060  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1061  %a = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
1062  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
1063  ret <8 x i16> %b
1064}
1065
1066define arm_aapcs_vfpcc <16 x i8> @usub_sat_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1067; CHECK-LABEL: usub_sat_v16i8_x:
1068; CHECK:       @ %bb.0: @ %entry
1069; CHECK-NEXT:    vctp.8 r0
1070; CHECK-NEXT:    vpst
1071; CHECK-NEXT:    vqsubt.u8 q0, q0, q1
1072; CHECK-NEXT:    bx lr
1073entry:
1074  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1075  %a = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
1076  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
1077  ret <16 x i8> %b
1078}
1079
1080define arm_aapcs_vfpcc <4 x i32> @addqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) {
1081; CHECK-LABEL: addqr_v4i32_x:
1082; CHECK:       @ %bb.0: @ %entry
1083; CHECK-NEXT:    vctp.32 r1
1084; CHECK-NEXT:    vpst
1085; CHECK-NEXT:    vaddt.i32 q0, q0, r0
1086; CHECK-NEXT:    bx lr
1087entry:
1088  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1089  %i = insertelement <4 x i32> undef, i32 %y, i32 0
1090  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
1091  %a = add <4 x i32> %x, %ys
1092  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
1093  ret <4 x i32> %b
1094}
1095
1096define arm_aapcs_vfpcc <8 x i16> @addqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) {
1097; CHECK-LABEL: addqr_v8i16_x:
1098; CHECK:       @ %bb.0: @ %entry
1099; CHECK-NEXT:    vctp.16 r1
1100; CHECK-NEXT:    vpst
1101; CHECK-NEXT:    vaddt.i16 q0, q0, r0
1102; CHECK-NEXT:    bx lr
1103entry:
1104  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1105  %i = insertelement <8 x i16> undef, i16 %y, i32 0
1106  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
1107  %a = add <8 x i16> %x, %ys
1108  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
1109  ret <8 x i16> %b
1110}
1111
1112define arm_aapcs_vfpcc <16 x i8> @addqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) {
1113; CHECK-LABEL: addqr_v16i8_x:
1114; CHECK:       @ %bb.0: @ %entry
1115; CHECK-NEXT:    vctp.8 r1
1116; CHECK-NEXT:    vpst
1117; CHECK-NEXT:    vaddt.i8 q0, q0, r0
1118; CHECK-NEXT:    bx lr
1119entry:
1120  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1121  %i = insertelement <16 x i8> undef, i8 %y, i32 0
1122  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
1123  %a = add <16 x i8> %x, %ys
1124  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
1125  ret <16 x i8> %b
1126}
1127
1128define arm_aapcs_vfpcc <4 x i32> @subqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) {
1129; CHECK-LABEL: subqr_v4i32_x:
1130; CHECK:       @ %bb.0: @ %entry
1131; CHECK-NEXT:    vctp.32 r1
1132; CHECK-NEXT:    vpst
1133; CHECK-NEXT:    vsubt.i32 q0, q0, r0
1134; CHECK-NEXT:    bx lr
1135entry:
1136  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1137  %i = insertelement <4 x i32> undef, i32 %y, i32 0
1138  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
1139  %a = sub <4 x i32> %x, %ys
1140  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
1141  ret <4 x i32> %b
1142}
1143
1144define arm_aapcs_vfpcc <8 x i16> @subqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) {
1145; CHECK-LABEL: subqr_v8i16_x:
1146; CHECK:       @ %bb.0: @ %entry
1147; CHECK-NEXT:    vctp.16 r1
1148; CHECK-NEXT:    vpst
1149; CHECK-NEXT:    vsubt.i16 q0, q0, r0
1150; CHECK-NEXT:    bx lr
1151entry:
1152  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1153  %i = insertelement <8 x i16> undef, i16 %y, i32 0
1154  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
1155  %a = sub <8 x i16> %x, %ys
1156  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
1157  ret <8 x i16> %b
1158}
1159
1160define arm_aapcs_vfpcc <16 x i8> @subqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) {
1161; CHECK-LABEL: subqr_v16i8_x:
1162; CHECK:       @ %bb.0: @ %entry
1163; CHECK-NEXT:    vctp.8 r1
1164; CHECK-NEXT:    vpst
1165; CHECK-NEXT:    vsubt.i8 q0, q0, r0
1166; CHECK-NEXT:    bx lr
1167entry:
1168  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1169  %i = insertelement <16 x i8> undef, i8 %y, i32 0
1170  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
1171  %a = sub <16 x i8> %x, %ys
1172  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
1173  ret <16 x i8> %b
1174}
1175
1176define arm_aapcs_vfpcc <4 x i32> @mulqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) {
1177; CHECK-LABEL: mulqr_v4i32_x:
1178; CHECK:       @ %bb.0: @ %entry
1179; CHECK-NEXT:    vctp.32 r1
1180; CHECK-NEXT:    vpst
1181; CHECK-NEXT:    vmult.i32 q0, q0, r0
1182; CHECK-NEXT:    bx lr
1183entry:
1184  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1185  %i = insertelement <4 x i32> undef, i32 %y, i32 0
1186  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
1187  %a = mul <4 x i32> %x, %ys
1188  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
1189  ret <4 x i32> %b
1190}
1191
1192define arm_aapcs_vfpcc <8 x i16> @mulqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) {
1193; CHECK-LABEL: mulqr_v8i16_x:
1194; CHECK:       @ %bb.0: @ %entry
1195; CHECK-NEXT:    vctp.16 r1
1196; CHECK-NEXT:    vpst
1197; CHECK-NEXT:    vmult.i16 q0, q0, r0
1198; CHECK-NEXT:    bx lr
1199entry:
1200  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1201  %i = insertelement <8 x i16> undef, i16 %y, i32 0
1202  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
1203  %a = mul <8 x i16> %x, %ys
1204  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
1205  ret <8 x i16> %b
1206}
1207
1208define arm_aapcs_vfpcc <16 x i8> @mulqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) {
1209; CHECK-LABEL: mulqr_v16i8_x:
1210; CHECK:       @ %bb.0: @ %entry
1211; CHECK-NEXT:    vctp.8 r1
1212; CHECK-NEXT:    vpst
1213; CHECK-NEXT:    vmult.i8 q0, q0, r0
1214; CHECK-NEXT:    bx lr
1215entry:
1216  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1217  %i = insertelement <16 x i8> undef, i8 %y, i32 0
1218  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
1219  %a = mul <16 x i8> %x, %ys
1220  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
1221  ret <16 x i8> %b
1222}
1223
1224define arm_aapcs_vfpcc <4 x float> @faddqr_v4f32_x(<4 x float> %x, float %y, i32 %n) {
1225; CHECK-LABEL: faddqr_v4f32_x:
1226; CHECK:       @ %bb.0: @ %entry
1227; CHECK-NEXT:    vmov r1, s4
1228; CHECK-NEXT:    vctp.32 r0
1229; CHECK-NEXT:    vpst
1230; CHECK-NEXT:    vaddt.f32 q0, q0, r1
1231; CHECK-NEXT:    bx lr
1232entry:
1233  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1234  %i = insertelement <4 x float> undef, float %y, i32 0
1235  %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
1236  %a = fadd <4 x float> %x, %ys
1237  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x
1238  ret <4 x float> %b
1239}
1240
1241define arm_aapcs_vfpcc <8 x half> @faddqr_v8f16_x(<8 x half> %x, half %y, i32 %n) {
1242; CHECK-LABEL: faddqr_v8f16_x:
1243; CHECK:       @ %bb.0: @ %entry
1244; CHECK-NEXT:    vmov.f16 r1, s4
1245; CHECK-NEXT:    vctp.16 r0
1246; CHECK-NEXT:    vpst
1247; CHECK-NEXT:    vaddt.f16 q0, q0, r1
1248; CHECK-NEXT:    bx lr
1249entry:
1250  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1251  %i = insertelement <8 x half> undef, half %y, i32 0
1252  %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
1253  %a = fadd <8 x half> %x, %ys
1254  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x
1255  ret <8 x half> %b
1256}
1257
1258define arm_aapcs_vfpcc <4 x float> @fsubqr_v4f32_x(<4 x float> %x, float %y, i32 %n) {
1259; CHECK-LABEL: fsubqr_v4f32_x:
1260; CHECK:       @ %bb.0: @ %entry
1261; CHECK-NEXT:    vmov r1, s4
1262; CHECK-NEXT:    vctp.32 r0
1263; CHECK-NEXT:    vpst
1264; CHECK-NEXT:    vsubt.f32 q0, q0, r1
1265; CHECK-NEXT:    bx lr
1266entry:
1267  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1268  %i = insertelement <4 x float> undef, float %y, i32 0
1269  %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
1270  %a = fsub <4 x float> %x, %ys
1271  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x
1272  ret <4 x float> %b
1273}
1274
1275define arm_aapcs_vfpcc <8 x half> @fsubqr_v8f16_x(<8 x half> %x, half %y, i32 %n) {
1276; CHECK-LABEL: fsubqr_v8f16_x:
1277; CHECK:       @ %bb.0: @ %entry
1278; CHECK-NEXT:    vmov.f16 r1, s4
1279; CHECK-NEXT:    vctp.16 r0
1280; CHECK-NEXT:    vpst
1281; CHECK-NEXT:    vsubt.f16 q0, q0, r1
1282; CHECK-NEXT:    bx lr
1283entry:
1284  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1285  %i = insertelement <8 x half> undef, half %y, i32 0
1286  %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
1287  %a = fsub <8 x half> %x, %ys
1288  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x
1289  ret <8 x half> %b
1290}
1291
1292define arm_aapcs_vfpcc <4 x float> @fmulqr_v4f32_x(<4 x float> %x, float %y, i32 %n) {
1293; CHECK-LABEL: fmulqr_v4f32_x:
1294; CHECK:       @ %bb.0: @ %entry
1295; CHECK-NEXT:    vmov r1, s4
1296; CHECK-NEXT:    vctp.32 r0
1297; CHECK-NEXT:    vpst
1298; CHECK-NEXT:    vmult.f32 q0, q0, r1
1299; CHECK-NEXT:    bx lr
1300entry:
1301  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1302  %i = insertelement <4 x float> undef, float %y, i32 0
1303  %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
1304  %a = fmul <4 x float> %x, %ys
1305  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x
1306  ret <4 x float> %b
1307}
1308
1309define arm_aapcs_vfpcc <8 x half> @fmulqr_v8f16_x(<8 x half> %x, half %y, i32 %n) {
1310; CHECK-LABEL: fmulqr_v8f16_x:
1311; CHECK:       @ %bb.0: @ %entry
1312; CHECK-NEXT:    vmov.f16 r1, s4
1313; CHECK-NEXT:    vctp.16 r0
1314; CHECK-NEXT:    vpst
1315; CHECK-NEXT:    vmult.f16 q0, q0, r1
1316; CHECK-NEXT:    bx lr
1317entry:
1318  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1319  %i = insertelement <8 x half> undef, half %y, i32 0
1320  %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
1321  %a = fmul <8 x half> %x, %ys
1322  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x
1323  ret <8 x half> %b
1324}
1325
1326define arm_aapcs_vfpcc <4 x i32> @sadd_satqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) {
1327; CHECK-LABEL: sadd_satqr_v4i32_x:
1328; CHECK:       @ %bb.0: @ %entry
1329; CHECK-NEXT:    vctp.32 r1
1330; CHECK-NEXT:    vpst
1331; CHECK-NEXT:    vqaddt.s32 q0, q0, r0
1332; CHECK-NEXT:    bx lr
1333entry:
1334  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1335  %i = insertelement <4 x i32> undef, i32 %y, i32 0
1336  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
1337  %a = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
1338  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
1339  ret <4 x i32> %b
1340}
1341
1342define arm_aapcs_vfpcc <8 x i16> @sadd_satqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) {
1343; CHECK-LABEL: sadd_satqr_v8i16_x:
1344; CHECK:       @ %bb.0: @ %entry
1345; CHECK-NEXT:    vctp.16 r1
1346; CHECK-NEXT:    vpst
1347; CHECK-NEXT:    vqaddt.s16 q0, q0, r0
1348; CHECK-NEXT:    bx lr
1349entry:
1350  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1351  %i = insertelement <8 x i16> undef, i16 %y, i32 0
1352  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
1353  %a = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
1354  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
1355  ret <8 x i16> %b
1356}
1357
1358define arm_aapcs_vfpcc <16 x i8> @sadd_satqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) {
1359; CHECK-LABEL: sadd_satqr_v16i8_x:
1360; CHECK:       @ %bb.0: @ %entry
1361; CHECK-NEXT:    vctp.8 r1
1362; CHECK-NEXT:    vpst
1363; CHECK-NEXT:    vqaddt.s8 q0, q0, r0
1364; CHECK-NEXT:    bx lr
1365entry:
1366  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1367  %i = insertelement <16 x i8> undef, i8 %y, i32 0
1368  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
1369  %a = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
1370  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
1371  ret <16 x i8> %b
1372}
1373
1374define arm_aapcs_vfpcc <4 x i32> @uadd_satqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) {
1375; CHECK-LABEL: uadd_satqr_v4i32_x:
1376; CHECK:       @ %bb.0: @ %entry
1377; CHECK-NEXT:    vctp.32 r1
1378; CHECK-NEXT:    vpst
1379; CHECK-NEXT:    vqaddt.u32 q0, q0, r0
1380; CHECK-NEXT:    bx lr
1381entry:
1382  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1383  %i = insertelement <4 x i32> undef, i32 %y, i32 0
1384  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
1385  %a = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
1386  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
1387  ret <4 x i32> %b
1388}
1389
1390define arm_aapcs_vfpcc <8 x i16> @uadd_satqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) {
1391; CHECK-LABEL: uadd_satqr_v8i16_x:
1392; CHECK:       @ %bb.0: @ %entry
1393; CHECK-NEXT:    vctp.16 r1
1394; CHECK-NEXT:    vpst
1395; CHECK-NEXT:    vqaddt.u16 q0, q0, r0
1396; CHECK-NEXT:    bx lr
1397entry:
1398  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1399  %i = insertelement <8 x i16> undef, i16 %y, i32 0
1400  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
1401  %a = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
1402  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
1403  ret <8 x i16> %b
1404}
1405
1406define arm_aapcs_vfpcc <16 x i8> @uadd_satqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) {
1407; CHECK-LABEL: uadd_satqr_v16i8_x:
1408; CHECK:       @ %bb.0: @ %entry
1409; CHECK-NEXT:    vctp.8 r1
1410; CHECK-NEXT:    vpst
1411; CHECK-NEXT:    vqaddt.u8 q0, q0, r0
1412; CHECK-NEXT:    bx lr
1413entry:
1414  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1415  %i = insertelement <16 x i8> undef, i8 %y, i32 0
1416  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
1417  %a = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
1418  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
1419  ret <16 x i8> %b
1420}
1421
1422define arm_aapcs_vfpcc <4 x i32> @ssub_satqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) {
1423; CHECK-LABEL: ssub_satqr_v4i32_x:
1424; CHECK:       @ %bb.0: @ %entry
1425; CHECK-NEXT:    vctp.32 r1
1426; CHECK-NEXT:    vpst
1427; CHECK-NEXT:    vqsubt.s32 q0, q0, r0
1428; CHECK-NEXT:    bx lr
1429entry:
1430  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1431  %i = insertelement <4 x i32> undef, i32 %y, i32 0
1432  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
1433  %a = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
1434  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
1435  ret <4 x i32> %b
1436}
1437
1438define arm_aapcs_vfpcc <8 x i16> @ssub_satqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) {
1439; CHECK-LABEL: ssub_satqr_v8i16_x:
1440; CHECK:       @ %bb.0: @ %entry
1441; CHECK-NEXT:    vctp.16 r1
1442; CHECK-NEXT:    vpst
1443; CHECK-NEXT:    vqsubt.s16 q0, q0, r0
1444; CHECK-NEXT:    bx lr
1445entry:
1446  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1447  %i = insertelement <8 x i16> undef, i16 %y, i32 0
1448  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
1449  %a = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
1450  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
1451  ret <8 x i16> %b
1452}
1453
1454define arm_aapcs_vfpcc <16 x i8> @ssub_satqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) {
1455; CHECK-LABEL: ssub_satqr_v16i8_x:
1456; CHECK:       @ %bb.0: @ %entry
1457; CHECK-NEXT:    vctp.8 r1
1458; CHECK-NEXT:    vpst
1459; CHECK-NEXT:    vqsubt.s8 q0, q0, r0
1460; CHECK-NEXT:    bx lr
1461entry:
1462  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1463  %i = insertelement <16 x i8> undef, i8 %y, i32 0
1464  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
1465  %a = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
1466  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
1467  ret <16 x i8> %b
1468}
1469
1470define arm_aapcs_vfpcc <4 x i32> @usub_satqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) {
1471; CHECK-LABEL: usub_satqr_v4i32_x:
1472; CHECK:       @ %bb.0: @ %entry
1473; CHECK-NEXT:    vctp.32 r1
1474; CHECK-NEXT:    vpst
1475; CHECK-NEXT:    vqsubt.u32 q0, q0, r0
1476; CHECK-NEXT:    bx lr
1477entry:
1478  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1479  %i = insertelement <4 x i32> undef, i32 %y, i32 0
1480  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
1481  %a = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
1482  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
1483  ret <4 x i32> %b
1484}
1485
1486define arm_aapcs_vfpcc <8 x i16> @usub_satqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) {
1487; CHECK-LABEL: usub_satqr_v8i16_x:
1488; CHECK:       @ %bb.0: @ %entry
1489; CHECK-NEXT:    vctp.16 r1
1490; CHECK-NEXT:    vpst
1491; CHECK-NEXT:    vqsubt.u16 q0, q0, r0
1492; CHECK-NEXT:    bx lr
1493entry:
1494  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1495  %i = insertelement <8 x i16> undef, i16 %y, i32 0
1496  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
1497  %a = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
1498  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
1499  ret <8 x i16> %b
1500}
1501
1502define arm_aapcs_vfpcc <16 x i8> @usub_satqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) {
1503; CHECK-LABEL: usub_satqr_v16i8_x:
1504; CHECK:       @ %bb.0: @ %entry
1505; CHECK-NEXT:    vctp.8 r1
1506; CHECK-NEXT:    vpst
1507; CHECK-NEXT:    vqsubt.u8 q0, q0, r0
1508; CHECK-NEXT:    bx lr
1509entry:
1510  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1511  %i = insertelement <16 x i8> undef, i8 %y, i32 0
1512  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
1513  %a = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
1514  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
1515  ret <16 x i8> %b
1516}
1517
1518define arm_aapcs_vfpcc <4 x i32> @add_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1519; CHECK-LABEL: add_v4i32_y:
1520; CHECK:       @ %bb.0: @ %entry
1521; CHECK-NEXT:    vctp.32 r0
1522; CHECK-NEXT:    vpst
1523; CHECK-NEXT:    vaddt.i32 q1, q0, q1
1524; CHECK-NEXT:    vmov q0, q1
1525; CHECK-NEXT:    bx lr
1526entry:
1527  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1528  %a = add <4 x i32> %x, %y
1529  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
1530  ret <4 x i32> %b
1531}
1532
1533define arm_aapcs_vfpcc <8 x i16> @add_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1534; CHECK-LABEL: add_v8i16_y:
1535; CHECK:       @ %bb.0: @ %entry
1536; CHECK-NEXT:    vctp.16 r0
1537; CHECK-NEXT:    vpst
1538; CHECK-NEXT:    vaddt.i16 q1, q0, q1
1539; CHECK-NEXT:    vmov q0, q1
1540; CHECK-NEXT:    bx lr
1541entry:
1542  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1543  %a = add <8 x i16> %x, %y
1544  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
1545  ret <8 x i16> %b
1546}
1547
1548define arm_aapcs_vfpcc <16 x i8> @add_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1549; CHECK-LABEL: add_v16i8_y:
1550; CHECK:       @ %bb.0: @ %entry
1551; CHECK-NEXT:    vctp.8 r0
1552; CHECK-NEXT:    vpst
1553; CHECK-NEXT:    vaddt.i8 q1, q0, q1
1554; CHECK-NEXT:    vmov q0, q1
1555; CHECK-NEXT:    bx lr
1556entry:
1557  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1558  %a = add <16 x i8> %x, %y
1559  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
1560  ret <16 x i8> %b
1561}
1562
1563define arm_aapcs_vfpcc <4 x i32> @sub_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1564; CHECK-LABEL: sub_v4i32_y:
1565; CHECK:       @ %bb.0: @ %entry
1566; CHECK-NEXT:    vctp.32 r0
1567; CHECK-NEXT:    vpst
1568; CHECK-NEXT:    vsubt.i32 q1, q0, q1
1569; CHECK-NEXT:    vmov q0, q1
1570; CHECK-NEXT:    bx lr
1571entry:
1572  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1573  %a = sub <4 x i32> %x, %y
1574  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
1575  ret <4 x i32> %b
1576}
1577
1578define arm_aapcs_vfpcc <8 x i16> @sub_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1579; CHECK-LABEL: sub_v8i16_y:
1580; CHECK:       @ %bb.0: @ %entry
1581; CHECK-NEXT:    vctp.16 r0
1582; CHECK-NEXT:    vpst
1583; CHECK-NEXT:    vsubt.i16 q1, q0, q1
1584; CHECK-NEXT:    vmov q0, q1
1585; CHECK-NEXT:    bx lr
1586entry:
1587  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1588  %a = sub <8 x i16> %x, %y
1589  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
1590  ret <8 x i16> %b
1591}
1592
1593define arm_aapcs_vfpcc <16 x i8> @sub_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1594; CHECK-LABEL: sub_v16i8_y:
1595; CHECK:       @ %bb.0: @ %entry
1596; CHECK-NEXT:    vctp.8 r0
1597; CHECK-NEXT:    vpst
1598; CHECK-NEXT:    vsubt.i8 q1, q0, q1
1599; CHECK-NEXT:    vmov q0, q1
1600; CHECK-NEXT:    bx lr
1601entry:
1602  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1603  %a = sub <16 x i8> %x, %y
1604  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
1605  ret <16 x i8> %b
1606}
1607
1608define arm_aapcs_vfpcc <4 x i32> @mul_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1609; CHECK-LABEL: mul_v4i32_y:
1610; CHECK:       @ %bb.0: @ %entry
1611; CHECK-NEXT:    vctp.32 r0
1612; CHECK-NEXT:    vpst
1613; CHECK-NEXT:    vmult.i32 q1, q0, q1
1614; CHECK-NEXT:    vmov q0, q1
1615; CHECK-NEXT:    bx lr
1616entry:
1617  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1618  %a = mul <4 x i32> %x, %y
1619  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
1620  ret <4 x i32> %b
1621}
1622
1623define arm_aapcs_vfpcc <8 x i16> @mul_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1624; CHECK-LABEL: mul_v8i16_y:
1625; CHECK:       @ %bb.0: @ %entry
1626; CHECK-NEXT:    vctp.16 r0
1627; CHECK-NEXT:    vpst
1628; CHECK-NEXT:    vmult.i16 q1, q0, q1
1629; CHECK-NEXT:    vmov q0, q1
1630; CHECK-NEXT:    bx lr
1631entry:
1632  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1633  %a = mul <8 x i16> %x, %y
1634  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
1635  ret <8 x i16> %b
1636}
1637
1638define arm_aapcs_vfpcc <16 x i8> @mul_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1639; CHECK-LABEL: mul_v16i8_y:
1640; CHECK:       @ %bb.0: @ %entry
1641; CHECK-NEXT:    vctp.8 r0
1642; CHECK-NEXT:    vpst
1643; CHECK-NEXT:    vmult.i8 q1, q0, q1
1644; CHECK-NEXT:    vmov q0, q1
1645; CHECK-NEXT:    bx lr
1646entry:
1647  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1648  %a = mul <16 x i8> %x, %y
1649  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
1650  ret <16 x i8> %b
1651}
1652
1653define arm_aapcs_vfpcc <4 x i32> @and_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1654; CHECK-LABEL: and_v4i32_y:
1655; CHECK:       @ %bb.0: @ %entry
1656; CHECK-NEXT:    vctp.32 r0
1657; CHECK-NEXT:    vpst
1658; CHECK-NEXT:    vandt q1, q0, q1
1659; CHECK-NEXT:    vmov q0, q1
1660; CHECK-NEXT:    bx lr
1661entry:
1662  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1663  %a = and <4 x i32> %x, %y
1664  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
1665  ret <4 x i32> %b
1666}
1667
1668define arm_aapcs_vfpcc <8 x i16> @and_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1669; CHECK-LABEL: and_v8i16_y:
1670; CHECK:       @ %bb.0: @ %entry
1671; CHECK-NEXT:    vctp.16 r0
1672; CHECK-NEXT:    vpst
1673; CHECK-NEXT:    vandt q1, q0, q1
1674; CHECK-NEXT:    vmov q0, q1
1675; CHECK-NEXT:    bx lr
1676entry:
1677  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1678  %a = and <8 x i16> %x, %y
1679  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
1680  ret <8 x i16> %b
1681}
1682
1683define arm_aapcs_vfpcc <16 x i8> @and_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1684; CHECK-LABEL: and_v16i8_y:
1685; CHECK:       @ %bb.0: @ %entry
1686; CHECK-NEXT:    vctp.8 r0
1687; CHECK-NEXT:    vpst
1688; CHECK-NEXT:    vandt q1, q0, q1
1689; CHECK-NEXT:    vmov q0, q1
1690; CHECK-NEXT:    bx lr
1691entry:
1692  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1693  %a = and <16 x i8> %x, %y
1694  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
1695  ret <16 x i8> %b
1696}
1697
1698define arm_aapcs_vfpcc <4 x i32> @or_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1699; CHECK-LABEL: or_v4i32_y:
1700; CHECK:       @ %bb.0: @ %entry
1701; CHECK-NEXT:    vctp.32 r0
1702; CHECK-NEXT:    vpst
1703; CHECK-NEXT:    vorrt q1, q0, q1
1704; CHECK-NEXT:    vmov q0, q1
1705; CHECK-NEXT:    bx lr
1706entry:
1707  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1708  %a = or <4 x i32> %x, %y
1709  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
1710  ret <4 x i32> %b
1711}
1712
1713define arm_aapcs_vfpcc <8 x i16> @or_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1714; CHECK-LABEL: or_v8i16_y:
1715; CHECK:       @ %bb.0: @ %entry
1716; CHECK-NEXT:    vctp.16 r0
1717; CHECK-NEXT:    vpst
1718; CHECK-NEXT:    vorrt q1, q0, q1
1719; CHECK-NEXT:    vmov q0, q1
1720; CHECK-NEXT:    bx lr
1721entry:
1722  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1723  %a = or <8 x i16> %x, %y
1724  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
1725  ret <8 x i16> %b
1726}
1727
1728define arm_aapcs_vfpcc <16 x i8> @or_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1729; CHECK-LABEL: or_v16i8_y:
1730; CHECK:       @ %bb.0: @ %entry
1731; CHECK-NEXT:    vctp.8 r0
1732; CHECK-NEXT:    vpst
1733; CHECK-NEXT:    vorrt q1, q0, q1
1734; CHECK-NEXT:    vmov q0, q1
1735; CHECK-NEXT:    bx lr
1736entry:
1737  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1738  %a = or <16 x i8> %x, %y
1739  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
1740  ret <16 x i8> %b
1741}
1742
1743define arm_aapcs_vfpcc <4 x i32> @xor_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1744; CHECK-LABEL: xor_v4i32_y:
1745; CHECK:       @ %bb.0: @ %entry
1746; CHECK-NEXT:    vctp.32 r0
1747; CHECK-NEXT:    vpst
1748; CHECK-NEXT:    veort q1, q0, q1
1749; CHECK-NEXT:    vmov q0, q1
1750; CHECK-NEXT:    bx lr
1751entry:
1752  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1753  %a = xor <4 x i32> %x, %y
1754  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
1755  ret <4 x i32> %b
1756}
1757
1758define arm_aapcs_vfpcc <8 x i16> @xor_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1759; CHECK-LABEL: xor_v8i16_y:
1760; CHECK:       @ %bb.0: @ %entry
1761; CHECK-NEXT:    vctp.16 r0
1762; CHECK-NEXT:    vpst
1763; CHECK-NEXT:    veort q1, q0, q1
1764; CHECK-NEXT:    vmov q0, q1
1765; CHECK-NEXT:    bx lr
1766entry:
1767  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1768  %a = xor <8 x i16> %x, %y
1769  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
1770  ret <8 x i16> %b
1771}
1772
1773define arm_aapcs_vfpcc <16 x i8> @xor_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1774; CHECK-LABEL: xor_v16i8_y:
1775; CHECK:       @ %bb.0: @ %entry
1776; CHECK-NEXT:    vctp.8 r0
1777; CHECK-NEXT:    vpst
1778; CHECK-NEXT:    veort q1, q0, q1
1779; CHECK-NEXT:    vmov q0, q1
1780; CHECK-NEXT:    bx lr
1781entry:
1782  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1783  %a = xor <16 x i8> %x, %y
1784  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
1785  ret <16 x i8> %b
1786}
1787
1788define arm_aapcs_vfpcc <4 x i32> @shl_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1789; CHECK-LABEL: shl_v4i32_y:
1790; CHECK:       @ %bb.0: @ %entry
1791; CHECK-NEXT:    vctp.32 r0
1792; CHECK-NEXT:    vpst
1793; CHECK-NEXT:    vshlt.u32 q1, q0, q1
1794; CHECK-NEXT:    vmov q0, q1
1795; CHECK-NEXT:    bx lr
1796entry:
1797  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1798  %a = shl <4 x i32> %x, %y
1799  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
1800  ret <4 x i32> %b
1801}
1802
1803define arm_aapcs_vfpcc <8 x i16> @shl_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1804; CHECK-LABEL: shl_v8i16_y:
1805; CHECK:       @ %bb.0: @ %entry
1806; CHECK-NEXT:    vctp.16 r0
1807; CHECK-NEXT:    vpst
1808; CHECK-NEXT:    vshlt.u16 q1, q0, q1
1809; CHECK-NEXT:    vmov q0, q1
1810; CHECK-NEXT:    bx lr
1811entry:
1812  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1813  %a = shl <8 x i16> %x, %y
1814  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
1815  ret <8 x i16> %b
1816}
1817
1818define arm_aapcs_vfpcc <16 x i8> @shl_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1819; CHECK-LABEL: shl_v16i8_y:
1820; CHECK:       @ %bb.0: @ %entry
1821; CHECK-NEXT:    vctp.8 r0
1822; CHECK-NEXT:    vpst
1823; CHECK-NEXT:    vshlt.u8 q1, q0, q1
1824; CHECK-NEXT:    vmov q0, q1
1825; CHECK-NEXT:    bx lr
1826entry:
1827  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1828  %a = shl <16 x i8> %x, %y
1829  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
1830  ret <16 x i8> %b
1831}
1832
1833define arm_aapcs_vfpcc <4 x i32> @ashr_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1834; CHECK-LABEL: ashr_v4i32_y:
1835; CHECK:       @ %bb.0: @ %entry
1836; CHECK-NEXT:    vneg.s32 q2, q1
1837; CHECK-NEXT:    vctp.32 r0
1838; CHECK-NEXT:    vpst
1839; CHECK-NEXT:    vshlt.s32 q1, q0, q2
1840; CHECK-NEXT:    vmov q0, q1
1841; CHECK-NEXT:    bx lr
1842entry:
1843  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1844  %a = ashr <4 x i32> %x, %y
1845  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
1846  ret <4 x i32> %b
1847}
1848
1849define arm_aapcs_vfpcc <8 x i16> @ashr_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1850; CHECK-LABEL: ashr_v8i16_y:
1851; CHECK:       @ %bb.0: @ %entry
1852; CHECK-NEXT:    vneg.s16 q2, q1
1853; CHECK-NEXT:    vctp.16 r0
1854; CHECK-NEXT:    vpst
1855; CHECK-NEXT:    vshlt.s16 q1, q0, q2
1856; CHECK-NEXT:    vmov q0, q1
1857; CHECK-NEXT:    bx lr
1858entry:
1859  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1860  %a = ashr <8 x i16> %x, %y
1861  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
1862  ret <8 x i16> %b
1863}
1864
1865define arm_aapcs_vfpcc <16 x i8> @ashr_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1866; CHECK-LABEL: ashr_v16i8_y:
1867; CHECK:       @ %bb.0: @ %entry
1868; CHECK-NEXT:    vneg.s8 q2, q1
1869; CHECK-NEXT:    vctp.8 r0
1870; CHECK-NEXT:    vpst
1871; CHECK-NEXT:    vshlt.s8 q1, q0, q2
1872; CHECK-NEXT:    vmov q0, q1
1873; CHECK-NEXT:    bx lr
1874entry:
1875  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1876  %a = ashr <16 x i8> %x, %y
1877  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
1878  ret <16 x i8> %b
1879}
1880
1881define arm_aapcs_vfpcc <4 x i32> @lshr_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1882; CHECK-LABEL: lshr_v4i32_y:
1883; CHECK:       @ %bb.0: @ %entry
1884; CHECK-NEXT:    vneg.s32 q2, q1
1885; CHECK-NEXT:    vctp.32 r0
1886; CHECK-NEXT:    vpst
1887; CHECK-NEXT:    vshlt.u32 q1, q0, q2
1888; CHECK-NEXT:    vmov q0, q1
1889; CHECK-NEXT:    bx lr
1890entry:
1891  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1892  %a = lshr <4 x i32> %x, %y
1893  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
1894  ret <4 x i32> %b
1895}
1896
1897define arm_aapcs_vfpcc <8 x i16> @lshr_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1898; CHECK-LABEL: lshr_v8i16_y:
1899; CHECK:       @ %bb.0: @ %entry
1900; CHECK-NEXT:    vneg.s16 q2, q1
1901; CHECK-NEXT:    vctp.16 r0
1902; CHECK-NEXT:    vpst
1903; CHECK-NEXT:    vshlt.u16 q1, q0, q2
1904; CHECK-NEXT:    vmov q0, q1
1905; CHECK-NEXT:    bx lr
1906entry:
1907  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1908  %a = lshr <8 x i16> %x, %y
1909  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
1910  ret <8 x i16> %b
1911}
1912
1913define arm_aapcs_vfpcc <16 x i8> @lshr_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1914; CHECK-LABEL: lshr_v16i8_y:
1915; CHECK:       @ %bb.0: @ %entry
1916; CHECK-NEXT:    vneg.s8 q2, q1
1917; CHECK-NEXT:    vctp.8 r0
1918; CHECK-NEXT:    vpst
1919; CHECK-NEXT:    vshlt.u8 q1, q0, q2
1920; CHECK-NEXT:    vmov q0, q1
1921; CHECK-NEXT:    bx lr
1922entry:
1923  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1924  %a = lshr <16 x i8> %x, %y
1925  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
1926  ret <16 x i8> %b
1927}
1928
1929define arm_aapcs_vfpcc <4 x i32> @andnot_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1930; CHECK-LABEL: andnot_v4i32_y:
1931; CHECK:       @ %bb.0: @ %entry
1932; CHECK-NEXT:    vctp.32 r0
1933; CHECK-NEXT:    vpst
1934; CHECK-NEXT:    vbict q1, q0, q1
1935; CHECK-NEXT:    vmov q0, q1
1936; CHECK-NEXT:    bx lr
1937entry:
1938  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1939  %y1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
1940  %a = and <4 x i32> %x, %y1
1941  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
1942  ret <4 x i32> %b
1943}
1944
1945define arm_aapcs_vfpcc <8 x i16> @andnot_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1946; CHECK-LABEL: andnot_v8i16_y:
1947; CHECK:       @ %bb.0: @ %entry
1948; CHECK-NEXT:    vctp.16 r0
1949; CHECK-NEXT:    vpst
1950; CHECK-NEXT:    vbict q1, q0, q1
1951; CHECK-NEXT:    vmov q0, q1
1952; CHECK-NEXT:    bx lr
1953entry:
1954  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1955  %y1 = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1956  %a = and <8 x i16> %x, %y1
1957  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
1958  ret <8 x i16> %b
1959}
1960
1961define arm_aapcs_vfpcc <16 x i8> @andnot_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1962; CHECK-LABEL: andnot_v16i8_y:
1963; CHECK:       @ %bb.0: @ %entry
1964; CHECK-NEXT:    vctp.8 r0
1965; CHECK-NEXT:    vpst
1966; CHECK-NEXT:    vbict q1, q0, q1
1967; CHECK-NEXT:    vmov q0, q1
1968; CHECK-NEXT:    bx lr
1969entry:
1970  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1971  %y1 = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1972  %a = and <16 x i8> %x, %y1
1973  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
1974  ret <16 x i8> %b
1975}
1976
1977define arm_aapcs_vfpcc <4 x i32> @ornot_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1978; CHECK-LABEL: ornot_v4i32_y:
1979; CHECK:       @ %bb.0: @ %entry
1980; CHECK-NEXT:    vctp.32 r0
1981; CHECK-NEXT:    vpst
1982; CHECK-NEXT:    vornt q1, q0, q1
1983; CHECK-NEXT:    vmov q0, q1
1984; CHECK-NEXT:    bx lr
1985entry:
1986  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1987  %y1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
1988  %a = or <4 x i32> %x, %y1
1989  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
1990  ret <4 x i32> %b
1991}
1992
1993define arm_aapcs_vfpcc <8 x i16> @ornot_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1994; CHECK-LABEL: ornot_v8i16_y:
1995; CHECK:       @ %bb.0: @ %entry
1996; CHECK-NEXT:    vctp.16 r0
1997; CHECK-NEXT:    vpst
1998; CHECK-NEXT:    vornt q1, q0, q1
1999; CHECK-NEXT:    vmov q0, q1
2000; CHECK-NEXT:    bx lr
2001entry:
2002  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2003  %y1 = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
2004  %a = or <8 x i16> %x, %y1
2005  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
2006  ret <8 x i16> %b
2007}
2008
2009define arm_aapcs_vfpcc <16 x i8> @ornot_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
2010; CHECK-LABEL: ornot_v16i8_y:
2011; CHECK:       @ %bb.0: @ %entry
2012; CHECK-NEXT:    vctp.8 r0
2013; CHECK-NEXT:    vpst
2014; CHECK-NEXT:    vornt q1, q0, q1
2015; CHECK-NEXT:    vmov q0, q1
2016; CHECK-NEXT:    bx lr
2017entry:
2018  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2019  %y1 = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
2020  %a = or <16 x i8> %x, %y1
2021  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
2022  ret <16 x i8> %b
2023}
2024
2025define arm_aapcs_vfpcc <4 x float> @fadd_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) {
2026; CHECK-LABEL: fadd_v4f32_y:
2027; CHECK:       @ %bb.0: @ %entry
2028; CHECK-NEXT:    vctp.32 r0
2029; CHECK-NEXT:    vpst
2030; CHECK-NEXT:    vaddt.f32 q1, q0, q1
2031; CHECK-NEXT:    vmov q0, q1
2032; CHECK-NEXT:    bx lr
2033entry:
2034  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2035  %a = fadd <4 x float> %x, %y
2036  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y
2037  ret <4 x float> %b
2038}
2039
2040define arm_aapcs_vfpcc <8 x half> @fadd_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) {
2041; CHECK-LABEL: fadd_v8f16_y:
2042; CHECK:       @ %bb.0: @ %entry
2043; CHECK-NEXT:    vctp.16 r0
2044; CHECK-NEXT:    vpst
2045; CHECK-NEXT:    vaddt.f16 q1, q0, q1
2046; CHECK-NEXT:    vmov q0, q1
2047; CHECK-NEXT:    bx lr
2048entry:
2049  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2050  %a = fadd <8 x half> %x, %y
2051  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y
2052  ret <8 x half> %b
2053}
2054
2055define arm_aapcs_vfpcc <4 x float> @fsub_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) {
2056; CHECK-LABEL: fsub_v4f32_y:
2057; CHECK:       @ %bb.0: @ %entry
2058; CHECK-NEXT:    vctp.32 r0
2059; CHECK-NEXT:    vpst
2060; CHECK-NEXT:    vsubt.f32 q1, q0, q1
2061; CHECK-NEXT:    vmov q0, q1
2062; CHECK-NEXT:    bx lr
2063entry:
2064  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2065  %a = fsub <4 x float> %x, %y
2066  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y
2067  ret <4 x float> %b
2068}
2069
2070define arm_aapcs_vfpcc <8 x half> @fsub_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) {
2071; CHECK-LABEL: fsub_v8f16_y:
2072; CHECK:       @ %bb.0: @ %entry
2073; CHECK-NEXT:    vctp.16 r0
2074; CHECK-NEXT:    vpst
2075; CHECK-NEXT:    vsubt.f16 q1, q0, q1
2076; CHECK-NEXT:    vmov q0, q1
2077; CHECK-NEXT:    bx lr
2078entry:
2079  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2080  %a = fsub <8 x half> %x, %y
2081  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y
2082  ret <8 x half> %b
2083}
2084
2085define arm_aapcs_vfpcc <4 x float> @fmul_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) {
2086; CHECK-LABEL: fmul_v4f32_y:
2087; CHECK:       @ %bb.0: @ %entry
2088; CHECK-NEXT:    vctp.32 r0
2089; CHECK-NEXT:    vpst
2090; CHECK-NEXT:    vmult.f32 q1, q0, q1
2091; CHECK-NEXT:    vmov q0, q1
2092; CHECK-NEXT:    bx lr
2093entry:
2094  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2095  %a = fmul <4 x float> %x, %y
2096  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y
2097  ret <4 x float> %b
2098}
2099
2100define arm_aapcs_vfpcc <8 x half> @fmul_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) {
2101; CHECK-LABEL: fmul_v8f16_y:
2102; CHECK:       @ %bb.0: @ %entry
2103; CHECK-NEXT:    vctp.16 r0
2104; CHECK-NEXT:    vpst
2105; CHECK-NEXT:    vmult.f16 q1, q0, q1
2106; CHECK-NEXT:    vmov q0, q1
2107; CHECK-NEXT:    bx lr
2108entry:
2109  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2110  %a = fmul <8 x half> %x, %y
2111  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y
2112  ret <8 x half> %b
2113}
2114
2115define arm_aapcs_vfpcc <4 x float> @fdiv_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) {
2116; CHECK-LABEL: fdiv_v4f32_y:
2117; CHECK:       @ %bb.0: @ %entry
2118; CHECK-NEXT:    vdiv.f32 s3, s3, s7
2119; CHECK-NEXT:    vctp.32 r0
2120; CHECK-NEXT:    vdiv.f32 s2, s2, s6
2121; CHECK-NEXT:    vdiv.f32 s1, s1, s5
2122; CHECK-NEXT:    vdiv.f32 s0, s0, s4
2123; CHECK-NEXT:    vpst
2124; CHECK-NEXT:    vmovt q1, q0
2125; CHECK-NEXT:    vmov q0, q1
2126; CHECK-NEXT:    bx lr
2127entry:
2128  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2129  %a = fdiv <4 x float> %x, %y
2130  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y
2131  ret <4 x float> %b
2132}
2133
2134define arm_aapcs_vfpcc <8 x half> @fdiv_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) {
2135; CHECK-LABEL: fdiv_v8f16_y:
2136; CHECK:       @ %bb.0: @ %entry
2137; CHECK-NEXT:    vmovx.f16 s10, s0
2138; CHECK-NEXT:    vmovx.f16 s8, s4
2139; CHECK-NEXT:    vdiv.f16 s8, s10, s8
2140; CHECK-NEXT:    vdiv.f16 s0, s0, s4
2141; CHECK-NEXT:    vins.f16 s0, s8
2142; CHECK-NEXT:    vmovx.f16 s10, s1
2143; CHECK-NEXT:    vmovx.f16 s8, s5
2144; CHECK-NEXT:    vdiv.f16 s1, s1, s5
2145; CHECK-NEXT:    vdiv.f16 s8, s10, s8
2146; CHECK-NEXT:    vmovx.f16 s10, s2
2147; CHECK-NEXT:    vins.f16 s1, s8
2148; CHECK-NEXT:    vmovx.f16 s8, s6
2149; CHECK-NEXT:    vdiv.f16 s8, s10, s8
2150; CHECK-NEXT:    vdiv.f16 s2, s2, s6
2151; CHECK-NEXT:    vins.f16 s2, s8
2152; CHECK-NEXT:    vmovx.f16 s10, s3
2153; CHECK-NEXT:    vmovx.f16 s8, s7
2154; CHECK-NEXT:    vdiv.f16 s3, s3, s7
2155; CHECK-NEXT:    vdiv.f16 s8, s10, s8
2156; CHECK-NEXT:    vctp.16 r0
2157; CHECK-NEXT:    vins.f16 s3, s8
2158; CHECK-NEXT:    vpst
2159; CHECK-NEXT:    vmovt q1, q0
2160; CHECK-NEXT:    vmov q0, q1
2161; CHECK-NEXT:    bx lr
2162entry:
2163  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2164  %a = fdiv <8 x half> %x, %y
2165  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y
2166  ret <8 x half> %b
2167}
2168
2169define arm_aapcs_vfpcc <4 x float> @fmai_v4f32_y(<4 x float> %x, <4 x float> %y, <4 x float> %z, i32 %n) {
2170; CHECK-LABEL: fmai_v4f32_y:
2171; CHECK:       @ %bb.0: @ %entry
2172; CHECK-NEXT:    vfma.f32 q0, q1, q2
2173; CHECK-NEXT:    vctp.32 r0
2174; CHECK-NEXT:    vpst
2175; CHECK-NEXT:    vmovt q1, q0
2176; CHECK-NEXT:    vmov q0, q1
2177; CHECK-NEXT:    bx lr
2178entry:
2179  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2180  %a = call <4 x float> @llvm.fma.v4f32(<4 x float> %y, <4 x float> %z, <4 x float> %x)
2181  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y
2182  ret <4 x float> %b
2183}
2184
2185define arm_aapcs_vfpcc <8 x half> @fmai_v8f16_y(<8 x half> %x, <8 x half> %y, <8 x half> %z, i32 %n) {
2186; CHECK-LABEL: fmai_v8f16_y:
2187; CHECK:       @ %bb.0: @ %entry
2188; CHECK-NEXT:    vfma.f16 q0, q1, q2
2189; CHECK-NEXT:    vctp.16 r0
2190; CHECK-NEXT:    vpst
2191; CHECK-NEXT:    vmovt q1, q0
2192; CHECK-NEXT:    vmov q0, q1
2193; CHECK-NEXT:    bx lr
2194entry:
2195  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2196  %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %y, <8 x half> %z, <8 x half> %x)
2197  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y
2198  ret <8 x half> %b
2199}
2200
2201define arm_aapcs_vfpcc <4 x float> @fma_v4f32_y(<4 x float> %x, <4 x float> %y, <4 x float> %z, i32 %n) {
2202; CHECK-LABEL: fma_v4f32_y:
2203; CHECK:       @ %bb.0: @ %entry
2204; CHECK-NEXT:    vfma.f32 q0, q1, q2
2205; CHECK-NEXT:    vctp.32 r0
2206; CHECK-NEXT:    vpst
2207; CHECK-NEXT:    vmovt q1, q0
2208; CHECK-NEXT:    vmov q0, q1
2209; CHECK-NEXT:    bx lr
2210entry:
2211  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2212  %m = fmul fast <4 x float> %y, %z
2213  %a = fadd fast <4 x float> %m, %x
2214  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y
2215  ret <4 x float> %b
2216}
2217
2218define arm_aapcs_vfpcc <8 x half> @fma_v8f16_y(<8 x half> %x, <8 x half> %y, <8 x half> %z, i32 %n) {
2219; CHECK-LABEL: fma_v8f16_y:
2220; CHECK:       @ %bb.0: @ %entry
2221; CHECK-NEXT:    vfma.f16 q0, q1, q2
2222; CHECK-NEXT:    vctp.16 r0
2223; CHECK-NEXT:    vpst
2224; CHECK-NEXT:    vmovt q1, q0
2225; CHECK-NEXT:    vmov q0, q1
2226; CHECK-NEXT:    bx lr
2227entry:
2228  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2229  %m = fmul fast <8 x half> %y, %z
2230  %a = fadd fast <8 x half> %m, %x
2231  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y
2232  ret <8 x half> %b
2233}
2234
2235define arm_aapcs_vfpcc <4 x i32> @icmp_slt_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
2236; CHECK-LABEL: icmp_slt_v4i32_y:
2237; CHECK:       @ %bb.0: @ %entry
2238; CHECK-NEXT:    vctp.32 r0
2239; CHECK-NEXT:    vpst
2240; CHECK-NEXT:    vmint.s32 q1, q0, q1
2241; CHECK-NEXT:    vmov q0, q1
2242; CHECK-NEXT:    bx lr
2243entry:
2244  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2245  %a1 = icmp slt <4 x i32> %x, %y
2246  %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y
2247  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
2248  ret <4 x i32> %b
2249}
2250
2251define arm_aapcs_vfpcc <8 x i16> @icmp_slt_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
2252; CHECK-LABEL: icmp_slt_v8i16_y:
2253; CHECK:       @ %bb.0: @ %entry
2254; CHECK-NEXT:    vctp.16 r0
2255; CHECK-NEXT:    vpst
2256; CHECK-NEXT:    vmint.s16 q1, q0, q1
2257; CHECK-NEXT:    vmov q0, q1
2258; CHECK-NEXT:    bx lr
2259entry:
2260  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2261  %a1 = icmp slt <8 x i16> %x, %y
2262  %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y
2263  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
2264  ret <8 x i16> %b
2265}
2266
2267define arm_aapcs_vfpcc <16 x i8> @icmp_slt_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
2268; CHECK-LABEL: icmp_slt_v16i8_y:
2269; CHECK:       @ %bb.0: @ %entry
2270; CHECK-NEXT:    vctp.8 r0
2271; CHECK-NEXT:    vpst
2272; CHECK-NEXT:    vmint.s8 q1, q0, q1
2273; CHECK-NEXT:    vmov q0, q1
2274; CHECK-NEXT:    bx lr
2275entry:
2276  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2277  %a1 = icmp slt <16 x i8> %x, %y
2278  %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y
2279  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
2280  ret <16 x i8> %b
2281}
2282
2283define arm_aapcs_vfpcc <4 x i32> @icmp_sgt_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
2284; CHECK-LABEL: icmp_sgt_v4i32_y:
2285; CHECK:       @ %bb.0: @ %entry
2286; CHECK-NEXT:    vctp.32 r0
2287; CHECK-NEXT:    vpst
2288; CHECK-NEXT:    vmaxt.s32 q1, q0, q1
2289; CHECK-NEXT:    vmov q0, q1
2290; CHECK-NEXT:    bx lr
2291entry:
2292  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2293  %a1 = icmp sgt <4 x i32> %x, %y
2294  %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y
2295  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
2296  ret <4 x i32> %b
2297}
2298
2299define arm_aapcs_vfpcc <8 x i16> @icmp_sgt_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
2300; CHECK-LABEL: icmp_sgt_v8i16_y:
2301; CHECK:       @ %bb.0: @ %entry
2302; CHECK-NEXT:    vctp.16 r0
2303; CHECK-NEXT:    vpst
2304; CHECK-NEXT:    vmaxt.s16 q1, q0, q1
2305; CHECK-NEXT:    vmov q0, q1
2306; CHECK-NEXT:    bx lr
2307entry:
2308  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2309  %a1 = icmp sgt <8 x i16> %x, %y
2310  %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y
2311  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
2312  ret <8 x i16> %b
2313}
2314
2315define arm_aapcs_vfpcc <16 x i8> @icmp_sgt_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
2316; CHECK-LABEL: icmp_sgt_v16i8_y:
2317; CHECK:       @ %bb.0: @ %entry
2318; CHECK-NEXT:    vctp.8 r0
2319; CHECK-NEXT:    vpst
2320; CHECK-NEXT:    vmaxt.s8 q1, q0, q1
2321; CHECK-NEXT:    vmov q0, q1
2322; CHECK-NEXT:    bx lr
2323entry:
2324  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2325  %a1 = icmp sgt <16 x i8> %x, %y
2326  %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y
2327  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
2328  ret <16 x i8> %b
2329}
2330
2331define arm_aapcs_vfpcc <4 x i32> @icmp_ult_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
2332; CHECK-LABEL: icmp_ult_v4i32_y:
2333; CHECK:       @ %bb.0: @ %entry
2334; CHECK-NEXT:    vctp.32 r0
2335; CHECK-NEXT:    vpst
2336; CHECK-NEXT:    vmint.u32 q1, q0, q1
2337; CHECK-NEXT:    vmov q0, q1
2338; CHECK-NEXT:    bx lr
2339entry:
2340  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2341  %a1 = icmp ult <4 x i32> %x, %y
2342  %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y
2343  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
2344  ret <4 x i32> %b
2345}
2346
2347define arm_aapcs_vfpcc <8 x i16> @icmp_ult_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
2348; CHECK-LABEL: icmp_ult_v8i16_y:
2349; CHECK:       @ %bb.0: @ %entry
2350; CHECK-NEXT:    vctp.16 r0
2351; CHECK-NEXT:    vpst
2352; CHECK-NEXT:    vmint.u16 q1, q0, q1
2353; CHECK-NEXT:    vmov q0, q1
2354; CHECK-NEXT:    bx lr
2355entry:
2356  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2357  %a1 = icmp ult <8 x i16> %x, %y
2358  %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y
2359  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
2360  ret <8 x i16> %b
2361}
2362
2363define arm_aapcs_vfpcc <16 x i8> @icmp_ult_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
2364; CHECK-LABEL: icmp_ult_v16i8_y:
2365; CHECK:       @ %bb.0: @ %entry
2366; CHECK-NEXT:    vctp.8 r0
2367; CHECK-NEXT:    vpst
2368; CHECK-NEXT:    vmint.u8 q1, q0, q1
2369; CHECK-NEXT:    vmov q0, q1
2370; CHECK-NEXT:    bx lr
2371entry:
2372  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2373  %a1 = icmp ult <16 x i8> %x, %y
2374  %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y
2375  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
2376  ret <16 x i8> %b
2377}
2378
2379define arm_aapcs_vfpcc <4 x i32> @icmp_ugt_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
2380; CHECK-LABEL: icmp_ugt_v4i32_y:
2381; CHECK:       @ %bb.0: @ %entry
2382; CHECK-NEXT:    vctp.32 r0
2383; CHECK-NEXT:    vpst
2384; CHECK-NEXT:    vmaxt.u32 q1, q0, q1
2385; CHECK-NEXT:    vmov q0, q1
2386; CHECK-NEXT:    bx lr
2387entry:
2388  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2389  %a1 = icmp ugt <4 x i32> %x, %y
2390  %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y
2391  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
2392  ret <4 x i32> %b
2393}
2394
2395define arm_aapcs_vfpcc <8 x i16> @icmp_ugt_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
2396; CHECK-LABEL: icmp_ugt_v8i16_y:
2397; CHECK:       @ %bb.0: @ %entry
2398; CHECK-NEXT:    vctp.16 r0
2399; CHECK-NEXT:    vpst
2400; CHECK-NEXT:    vmaxt.u16 q1, q0, q1
2401; CHECK-NEXT:    vmov q0, q1
2402; CHECK-NEXT:    bx lr
2403entry:
2404  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2405  %a1 = icmp ugt <8 x i16> %x, %y
2406  %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y
2407  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
2408  ret <8 x i16> %b
2409}
2410
2411define arm_aapcs_vfpcc <16 x i8> @icmp_ugt_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
2412; CHECK-LABEL: icmp_ugt_v16i8_y:
2413; CHECK:       @ %bb.0: @ %entry
2414; CHECK-NEXT:    vctp.8 r0
2415; CHECK-NEXT:    vpst
2416; CHECK-NEXT:    vmaxt.u8 q1, q0, q1
2417; CHECK-NEXT:    vmov q0, q1
2418; CHECK-NEXT:    bx lr
2419entry:
2420  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2421  %a1 = icmp ugt <16 x i8> %x, %y
2422  %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y
2423  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
2424  ret <16 x i8> %b
2425}
2426
2427define arm_aapcs_vfpcc <4 x float> @fcmp_fast_olt_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) {
2428; CHECK-LABEL: fcmp_fast_olt_v4f32_y:
2429; CHECK:       @ %bb.0: @ %entry
2430; CHECK-NEXT:    vctp.32 r0
2431; CHECK-NEXT:    vpst
2432; CHECK-NEXT:    vminnmt.f32 q1, q0, q1
2433; CHECK-NEXT:    vmov q0, q1
2434; CHECK-NEXT:    bx lr
2435entry:
2436  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2437  %a1 = fcmp fast olt <4 x float> %x, %y
2438  %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y
2439  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y
2440  ret <4 x float> %b
2441}
2442
2443define arm_aapcs_vfpcc <8 x half> @fcmp_fast_olt_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) {
2444; CHECK-LABEL: fcmp_fast_olt_v8f16_y:
2445; CHECK:       @ %bb.0: @ %entry
2446; CHECK-NEXT:    vctp.16 r0
2447; CHECK-NEXT:    vpst
2448; CHECK-NEXT:    vminnmt.f16 q1, q0, q1
2449; CHECK-NEXT:    vmov q0, q1
2450; CHECK-NEXT:    bx lr
2451entry:
2452  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2453  %a1 = fcmp fast olt <8 x half> %x, %y
2454  %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y
2455  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y
2456  ret <8 x half> %b
2457}
2458
2459define arm_aapcs_vfpcc <4 x float> @fcmp_fast_ogt_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) {
2460; CHECK-LABEL: fcmp_fast_ogt_v4f32_y:
2461; CHECK:       @ %bb.0: @ %entry
2462; CHECK-NEXT:    vctp.32 r0
2463; CHECK-NEXT:    vpst
2464; CHECK-NEXT:    vmaxnmt.f32 q1, q0, q1
2465; CHECK-NEXT:    vmov q0, q1
2466; CHECK-NEXT:    bx lr
2467entry:
2468  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2469  %a1 = fcmp fast ogt <4 x float> %x, %y
2470  %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y
2471  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y
2472  ret <4 x float> %b
2473}
2474
2475define arm_aapcs_vfpcc <8 x half> @fcmp_fast_ogt_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) {
2476; CHECK-LABEL: fcmp_fast_ogt_v8f16_y:
2477; CHECK:       @ %bb.0: @ %entry
2478; CHECK-NEXT:    vctp.16 r0
2479; CHECK-NEXT:    vpst
2480; CHECK-NEXT:    vmaxnmt.f16 q1, q0, q1
2481; CHECK-NEXT:    vmov q0, q1
2482; CHECK-NEXT:    bx lr
2483entry:
2484  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2485  %a1 = fcmp fast ogt <8 x half> %x, %y
2486  %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y
2487  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y
2488  ret <8 x half> %b
2489}
2490
2491define arm_aapcs_vfpcc <4 x i32> @sadd_sat_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
2492; CHECK-LABEL: sadd_sat_v4i32_y:
2493; CHECK:       @ %bb.0: @ %entry
2494; CHECK-NEXT:    vctp.32 r0
2495; CHECK-NEXT:    vpst
2496; CHECK-NEXT:    vqaddt.s32 q1, q0, q1
2497; CHECK-NEXT:    vmov q0, q1
2498; CHECK-NEXT:    bx lr
2499entry:
2500  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2501  %a = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
2502  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
2503  ret <4 x i32> %b
2504}
2505
2506define arm_aapcs_vfpcc <8 x i16> @sadd_sat_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
2507; CHECK-LABEL: sadd_sat_v8i16_y:
2508; CHECK:       @ %bb.0: @ %entry
2509; CHECK-NEXT:    vctp.16 r0
2510; CHECK-NEXT:    vpst
2511; CHECK-NEXT:    vqaddt.s16 q1, q0, q1
2512; CHECK-NEXT:    vmov q0, q1
2513; CHECK-NEXT:    bx lr
2514entry:
2515  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2516  %a = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
2517  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
2518  ret <8 x i16> %b
2519}
2520
2521define arm_aapcs_vfpcc <16 x i8> @sadd_sat_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
2522; CHECK-LABEL: sadd_sat_v16i8_y:
2523; CHECK:       @ %bb.0: @ %entry
2524; CHECK-NEXT:    vctp.8 r0
2525; CHECK-NEXT:    vpst
2526; CHECK-NEXT:    vqaddt.s8 q1, q0, q1
2527; CHECK-NEXT:    vmov q0, q1
2528; CHECK-NEXT:    bx lr
2529entry:
2530  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2531  %a = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
2532  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
2533  ret <16 x i8> %b
2534}
2535
2536define arm_aapcs_vfpcc <4 x i32> @uadd_sat_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
2537; CHECK-LABEL: uadd_sat_v4i32_y:
2538; CHECK:       @ %bb.0: @ %entry
2539; CHECK-NEXT:    vctp.32 r0
2540; CHECK-NEXT:    vpst
2541; CHECK-NEXT:    vqaddt.u32 q1, q0, q1
2542; CHECK-NEXT:    vmov q0, q1
2543; CHECK-NEXT:    bx lr
2544entry:
2545  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2546  %a = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
2547  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
2548  ret <4 x i32> %b
2549}
2550
2551define arm_aapcs_vfpcc <8 x i16> @uadd_sat_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
2552; CHECK-LABEL: uadd_sat_v8i16_y:
2553; CHECK:       @ %bb.0: @ %entry
2554; CHECK-NEXT:    vctp.16 r0
2555; CHECK-NEXT:    vpst
2556; CHECK-NEXT:    vqaddt.u16 q1, q0, q1
2557; CHECK-NEXT:    vmov q0, q1
2558; CHECK-NEXT:    bx lr
2559entry:
2560  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2561  %a = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
2562  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
2563  ret <8 x i16> %b
2564}
2565
2566define arm_aapcs_vfpcc <16 x i8> @uadd_sat_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
2567; CHECK-LABEL: uadd_sat_v16i8_y:
2568; CHECK:       @ %bb.0: @ %entry
2569; CHECK-NEXT:    vctp.8 r0
2570; CHECK-NEXT:    vpst
2571; CHECK-NEXT:    vqaddt.u8 q1, q0, q1
2572; CHECK-NEXT:    vmov q0, q1
2573; CHECK-NEXT:    bx lr
2574entry:
2575  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2576  %a = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
2577  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
2578  ret <16 x i8> %b
2579}
2580
2581define arm_aapcs_vfpcc <4 x i32> @ssub_sat_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
2582; CHECK-LABEL: ssub_sat_v4i32_y:
2583; CHECK:       @ %bb.0: @ %entry
2584; CHECK-NEXT:    vctp.32 r0
2585; CHECK-NEXT:    vpst
2586; CHECK-NEXT:    vqsubt.s32 q1, q0, q1
2587; CHECK-NEXT:    vmov q0, q1
2588; CHECK-NEXT:    bx lr
2589entry:
2590  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2591  %a = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
2592  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
2593  ret <4 x i32> %b
2594}
2595
2596define arm_aapcs_vfpcc <8 x i16> @ssub_sat_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
2597; CHECK-LABEL: ssub_sat_v8i16_y:
2598; CHECK:       @ %bb.0: @ %entry
2599; CHECK-NEXT:    vctp.16 r0
2600; CHECK-NEXT:    vpst
2601; CHECK-NEXT:    vqsubt.s16 q1, q0, q1
2602; CHECK-NEXT:    vmov q0, q1
2603; CHECK-NEXT:    bx lr
2604entry:
2605  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2606  %a = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
2607  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
2608  ret <8 x i16> %b
2609}
2610
2611define arm_aapcs_vfpcc <16 x i8> @ssub_sat_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
2612; CHECK-LABEL: ssub_sat_v16i8_y:
2613; CHECK:       @ %bb.0: @ %entry
2614; CHECK-NEXT:    vctp.8 r0
2615; CHECK-NEXT:    vpst
2616; CHECK-NEXT:    vqsubt.s8 q1, q0, q1
2617; CHECK-NEXT:    vmov q0, q1
2618; CHECK-NEXT:    bx lr
2619entry:
2620  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2621  %a = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
2622  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
2623  ret <16 x i8> %b
2624}
2625
2626define arm_aapcs_vfpcc <4 x i32> @usub_sat_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
2627; CHECK-LABEL: usub_sat_v4i32_y:
2628; CHECK:       @ %bb.0: @ %entry
2629; CHECK-NEXT:    vctp.32 r0
2630; CHECK-NEXT:    vpst
2631; CHECK-NEXT:    vqsubt.u32 q1, q0, q1
2632; CHECK-NEXT:    vmov q0, q1
2633; CHECK-NEXT:    bx lr
2634entry:
2635  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2636  %a = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
2637  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
2638  ret <4 x i32> %b
2639}
2640
2641define arm_aapcs_vfpcc <8 x i16> @usub_sat_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
2642; CHECK-LABEL: usub_sat_v8i16_y:
2643; CHECK:       @ %bb.0: @ %entry
2644; CHECK-NEXT:    vctp.16 r0
2645; CHECK-NEXT:    vpst
2646; CHECK-NEXT:    vqsubt.u16 q1, q0, q1
2647; CHECK-NEXT:    vmov q0, q1
2648; CHECK-NEXT:    bx lr
2649entry:
2650  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2651  %a = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
2652  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
2653  ret <8 x i16> %b
2654}
2655
2656define arm_aapcs_vfpcc <16 x i8> @usub_sat_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
2657; CHECK-LABEL: usub_sat_v16i8_y:
2658; CHECK:       @ %bb.0: @ %entry
2659; CHECK-NEXT:    vctp.8 r0
2660; CHECK-NEXT:    vpst
2661; CHECK-NEXT:    vqsubt.u8 q1, q0, q1
2662; CHECK-NEXT:    vmov q0, q1
2663; CHECK-NEXT:    bx lr
2664entry:
2665  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2666  %a = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
2667  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
2668  ret <16 x i8> %b
2669}
2670
2671define arm_aapcs_vfpcc <4 x i32> @addqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) {
2672; CHECK-LABEL: addqr_v4i32_y:
2673; CHECK:       @ %bb.0: @ %entry
2674; CHECK-NEXT:    vdup.32 q1, r0
2675; CHECK-NEXT:    vctp.32 r1
2676; CHECK-NEXT:    vpst
2677; CHECK-NEXT:    vaddt.i32 q1, q0, r0
2678; CHECK-NEXT:    vmov q0, q1
2679; CHECK-NEXT:    bx lr
2680entry:
2681  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2682  %i = insertelement <4 x i32> undef, i32 %y, i32 0
2683  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
2684  %a = add <4 x i32> %x, %ys
2685  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys
2686  ret <4 x i32> %b
2687}
2688
2689define arm_aapcs_vfpcc <8 x i16> @addqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) {
2690; CHECK-LABEL: addqr_v8i16_y:
2691; CHECK:       @ %bb.0: @ %entry
2692; CHECK-NEXT:    vdup.16 q1, r0
2693; CHECK-NEXT:    vctp.16 r1
2694; CHECK-NEXT:    vpst
2695; CHECK-NEXT:    vaddt.i16 q1, q0, r0
2696; CHECK-NEXT:    vmov q0, q1
2697; CHECK-NEXT:    bx lr
2698entry:
2699  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2700  %i = insertelement <8 x i16> undef, i16 %y, i32 0
2701  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
2702  %a = add <8 x i16> %x, %ys
2703  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys
2704  ret <8 x i16> %b
2705}
2706
2707define arm_aapcs_vfpcc <16 x i8> @addqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) {
2708; CHECK-LABEL: addqr_v16i8_y:
2709; CHECK:       @ %bb.0: @ %entry
2710; CHECK-NEXT:    vdup.8 q1, r0
2711; CHECK-NEXT:    vctp.8 r1
2712; CHECK-NEXT:    vpst
2713; CHECK-NEXT:    vaddt.i8 q1, q0, r0
2714; CHECK-NEXT:    vmov q0, q1
2715; CHECK-NEXT:    bx lr
2716entry:
2717  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2718  %i = insertelement <16 x i8> undef, i8 %y, i32 0
2719  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
2720  %a = add <16 x i8> %x, %ys
2721  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys
2722  ret <16 x i8> %b
2723}
2724
2725define arm_aapcs_vfpcc <4 x i32> @subqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) {
2726; CHECK-LABEL: subqr_v4i32_y:
2727; CHECK:       @ %bb.0: @ %entry
2728; CHECK-NEXT:    vdup.32 q1, r0
2729; CHECK-NEXT:    vctp.32 r1
2730; CHECK-NEXT:    vpst
2731; CHECK-NEXT:    vsubt.i32 q1, q0, r0
2732; CHECK-NEXT:    vmov q0, q1
2733; CHECK-NEXT:    bx lr
2734entry:
2735  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2736  %i = insertelement <4 x i32> undef, i32 %y, i32 0
2737  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
2738  %a = sub <4 x i32> %x, %ys
2739  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys
2740  ret <4 x i32> %b
2741}
2742
2743define arm_aapcs_vfpcc <8 x i16> @subqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) {
2744; CHECK-LABEL: subqr_v8i16_y:
2745; CHECK:       @ %bb.0: @ %entry
2746; CHECK-NEXT:    vdup.16 q1, r0
2747; CHECK-NEXT:    vctp.16 r1
2748; CHECK-NEXT:    vpst
2749; CHECK-NEXT:    vsubt.i16 q1, q0, r0
2750; CHECK-NEXT:    vmov q0, q1
2751; CHECK-NEXT:    bx lr
2752entry:
2753  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2754  %i = insertelement <8 x i16> undef, i16 %y, i32 0
2755  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
2756  %a = sub <8 x i16> %x, %ys
2757  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys
2758  ret <8 x i16> %b
2759}
2760
2761define arm_aapcs_vfpcc <16 x i8> @subqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) {
2762; CHECK-LABEL: subqr_v16i8_y:
2763; CHECK:       @ %bb.0: @ %entry
2764; CHECK-NEXT:    vdup.8 q1, r0
2765; CHECK-NEXT:    vctp.8 r1
2766; CHECK-NEXT:    vpst
2767; CHECK-NEXT:    vsubt.i8 q1, q0, r0
2768; CHECK-NEXT:    vmov q0, q1
2769; CHECK-NEXT:    bx lr
2770entry:
2771  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2772  %i = insertelement <16 x i8> undef, i8 %y, i32 0
2773  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
2774  %a = sub <16 x i8> %x, %ys
2775  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys
2776  ret <16 x i8> %b
2777}
2778
2779define arm_aapcs_vfpcc <4 x i32> @mulqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) {
2780; CHECK-LABEL: mulqr_v4i32_y:
2781; CHECK:       @ %bb.0: @ %entry
2782; CHECK-NEXT:    vdup.32 q1, r0
2783; CHECK-NEXT:    vctp.32 r1
2784; CHECK-NEXT:    vpst
2785; CHECK-NEXT:    vmult.i32 q1, q0, r0
2786; CHECK-NEXT:    vmov q0, q1
2787; CHECK-NEXT:    bx lr
2788entry:
2789  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2790  %i = insertelement <4 x i32> undef, i32 %y, i32 0
2791  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
2792  %a = mul <4 x i32> %x, %ys
2793  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys
2794  ret <4 x i32> %b
2795}
2796
2797define arm_aapcs_vfpcc <8 x i16> @mulqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) {
2798; CHECK-LABEL: mulqr_v8i16_y:
2799; CHECK:       @ %bb.0: @ %entry
2800; CHECK-NEXT:    vdup.16 q1, r0
2801; CHECK-NEXT:    vctp.16 r1
2802; CHECK-NEXT:    vpst
2803; CHECK-NEXT:    vmult.i16 q1, q0, r0
2804; CHECK-NEXT:    vmov q0, q1
2805; CHECK-NEXT:    bx lr
2806entry:
2807  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2808  %i = insertelement <8 x i16> undef, i16 %y, i32 0
2809  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
2810  %a = mul <8 x i16> %x, %ys
2811  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys
2812  ret <8 x i16> %b
2813}
2814
2815define arm_aapcs_vfpcc <16 x i8> @mulqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) {
2816; CHECK-LABEL: mulqr_v16i8_y:
2817; CHECK:       @ %bb.0: @ %entry
2818; CHECK-NEXT:    vdup.8 q1, r0
2819; CHECK-NEXT:    vctp.8 r1
2820; CHECK-NEXT:    vpst
2821; CHECK-NEXT:    vmult.i8 q1, q0, r0
2822; CHECK-NEXT:    vmov q0, q1
2823; CHECK-NEXT:    bx lr
2824entry:
2825  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2826  %i = insertelement <16 x i8> undef, i8 %y, i32 0
2827  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
2828  %a = mul <16 x i8> %x, %ys
2829  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys
2830  ret <16 x i8> %b
2831}
2832
2833define arm_aapcs_vfpcc <4 x float> @faddqr_v4f32_y(<4 x float> %x, float %y, i32 %n) {
2834; CHECK-LABEL: faddqr_v4f32_y:
2835; CHECK:       @ %bb.0: @ %entry
2836; CHECK-NEXT:    vmov r1, s4
2837; CHECK-NEXT:    vctp.32 r0
2838; CHECK-NEXT:    vdup.32 q1, r1
2839; CHECK-NEXT:    vpst
2840; CHECK-NEXT:    vaddt.f32 q1, q0, r1
2841; CHECK-NEXT:    vmov q0, q1
2842; CHECK-NEXT:    bx lr
2843entry:
2844  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2845  %i = insertelement <4 x float> undef, float %y, i32 0
2846  %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
2847  %a = fadd <4 x float> %x, %ys
2848  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %ys
2849  ret <4 x float> %b
2850}
2851
2852define arm_aapcs_vfpcc <8 x half> @faddqr_v8f16_y(<8 x half> %x, half %y, i32 %n) {
2853; CHECK-LABEL: faddqr_v8f16_y:
2854; CHECK:       @ %bb.0: @ %entry
2855; CHECK-NEXT:    vmov.f16 r1, s4
2856; CHECK-NEXT:    vctp.16 r0
2857; CHECK-NEXT:    vdup.16 q1, r1
2858; CHECK-NEXT:    vpst
2859; CHECK-NEXT:    vaddt.f16 q1, q0, r1
2860; CHECK-NEXT:    vmov q0, q1
2861; CHECK-NEXT:    bx lr
2862entry:
2863  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2864  %i = insertelement <8 x half> undef, half %y, i32 0
2865  %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
2866  %a = fadd <8 x half> %x, %ys
2867  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %ys
2868  ret <8 x half> %b
2869}
2870
2871define arm_aapcs_vfpcc <4 x float> @fsubqr_v4f32_y(<4 x float> %x, float %y, i32 %n) {
2872; CHECK-LABEL: fsubqr_v4f32_y:
2873; CHECK:       @ %bb.0: @ %entry
2874; CHECK-NEXT:    vmov r1, s4
2875; CHECK-NEXT:    vctp.32 r0
2876; CHECK-NEXT:    vdup.32 q1, r1
2877; CHECK-NEXT:    vpst
2878; CHECK-NEXT:    vsubt.f32 q1, q0, r1
2879; CHECK-NEXT:    vmov q0, q1
2880; CHECK-NEXT:    bx lr
2881entry:
2882  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2883  %i = insertelement <4 x float> undef, float %y, i32 0
2884  %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
2885  %a = fsub <4 x float> %x, %ys
2886  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %ys
2887  ret <4 x float> %b
2888}
2889
2890define arm_aapcs_vfpcc <8 x half> @fsubqr_v8f16_y(<8 x half> %x, half %y, i32 %n) {
2891; CHECK-LABEL: fsubqr_v8f16_y:
2892; CHECK:       @ %bb.0: @ %entry
2893; CHECK-NEXT:    vmov.f16 r1, s4
2894; CHECK-NEXT:    vctp.16 r0
2895; CHECK-NEXT:    vdup.16 q1, r1
2896; CHECK-NEXT:    vpst
2897; CHECK-NEXT:    vsubt.f16 q1, q0, r1
2898; CHECK-NEXT:    vmov q0, q1
2899; CHECK-NEXT:    bx lr
2900entry:
2901  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2902  %i = insertelement <8 x half> undef, half %y, i32 0
2903  %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
2904  %a = fsub <8 x half> %x, %ys
2905  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %ys
2906  ret <8 x half> %b
2907}
2908
2909define arm_aapcs_vfpcc <4 x float> @fmulqr_v4f32_y(<4 x float> %x, float %y, i32 %n) {
2910; CHECK-LABEL: fmulqr_v4f32_y:
2911; CHECK:       @ %bb.0: @ %entry
2912; CHECK-NEXT:    vmov r1, s4
2913; CHECK-NEXT:    vctp.32 r0
2914; CHECK-NEXT:    vdup.32 q1, r1
2915; CHECK-NEXT:    vpst
2916; CHECK-NEXT:    vmult.f32 q1, q0, r1
2917; CHECK-NEXT:    vmov q0, q1
2918; CHECK-NEXT:    bx lr
2919entry:
2920  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2921  %i = insertelement <4 x float> undef, float %y, i32 0
2922  %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
2923  %a = fmul <4 x float> %x, %ys
2924  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %ys
2925  ret <4 x float> %b
2926}
2927
2928define arm_aapcs_vfpcc <8 x half> @fmulqr_v8f16_y(<8 x half> %x, half %y, i32 %n) {
2929; CHECK-LABEL: fmulqr_v8f16_y:
2930; CHECK:       @ %bb.0: @ %entry
2931; CHECK-NEXT:    vmov.f16 r1, s4
2932; CHECK-NEXT:    vctp.16 r0
2933; CHECK-NEXT:    vdup.16 q1, r1
2934; CHECK-NEXT:    vpst
2935; CHECK-NEXT:    vmult.f16 q1, q0, r1
2936; CHECK-NEXT:    vmov q0, q1
2937; CHECK-NEXT:    bx lr
2938entry:
2939  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2940  %i = insertelement <8 x half> undef, half %y, i32 0
2941  %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
2942  %a = fmul <8 x half> %x, %ys
2943  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %ys
2944  ret <8 x half> %b
2945}
2946
2947define arm_aapcs_vfpcc <4 x i32> @sadd_satqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) {
2948; CHECK-LABEL: sadd_satqr_v4i32_y:
2949; CHECK:       @ %bb.0: @ %entry
2950; CHECK-NEXT:    vdup.32 q1, r0
2951; CHECK-NEXT:    vctp.32 r1
2952; CHECK-NEXT:    vpst
2953; CHECK-NEXT:    vqaddt.s32 q1, q0, r0
2954; CHECK-NEXT:    vmov q0, q1
2955; CHECK-NEXT:    bx lr
2956entry:
2957  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2958  %i = insertelement <4 x i32> undef, i32 %y, i32 0
2959  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
2960  %a = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
2961  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys
2962  ret <4 x i32> %b
2963}
2964
2965define arm_aapcs_vfpcc <8 x i16> @sadd_satqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) {
2966; CHECK-LABEL: sadd_satqr_v8i16_y:
2967; CHECK:       @ %bb.0: @ %entry
2968; CHECK-NEXT:    vdup.16 q1, r0
2969; CHECK-NEXT:    vctp.16 r1
2970; CHECK-NEXT:    vpst
2971; CHECK-NEXT:    vqaddt.s16 q1, q0, r0
2972; CHECK-NEXT:    vmov q0, q1
2973; CHECK-NEXT:    bx lr
2974entry:
2975  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2976  %i = insertelement <8 x i16> undef, i16 %y, i32 0
2977  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
2978  %a = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
2979  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys
2980  ret <8 x i16> %b
2981}
2982
2983define arm_aapcs_vfpcc <16 x i8> @sadd_satqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) {
2984; CHECK-LABEL: sadd_satqr_v16i8_y:
2985; CHECK:       @ %bb.0: @ %entry
2986; CHECK-NEXT:    vdup.8 q1, r0
2987; CHECK-NEXT:    vctp.8 r1
2988; CHECK-NEXT:    vpst
2989; CHECK-NEXT:    vqaddt.s8 q1, q0, r0
2990; CHECK-NEXT:    vmov q0, q1
2991; CHECK-NEXT:    bx lr
2992entry:
2993  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2994  %i = insertelement <16 x i8> undef, i8 %y, i32 0
2995  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
2996  %a = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
2997  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys
2998  ret <16 x i8> %b
2999}
3000
3001define arm_aapcs_vfpcc <4 x i32> @uadd_satqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) {
3002; CHECK-LABEL: uadd_satqr_v4i32_y:
3003; CHECK:       @ %bb.0: @ %entry
3004; CHECK-NEXT:    vdup.32 q1, r0
3005; CHECK-NEXT:    vctp.32 r1
3006; CHECK-NEXT:    vpst
3007; CHECK-NEXT:    vqaddt.u32 q1, q0, r0
3008; CHECK-NEXT:    vmov q0, q1
3009; CHECK-NEXT:    bx lr
3010entry:
3011  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
3012  %i = insertelement <4 x i32> undef, i32 %y, i32 0
3013  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
3014  %a = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
3015  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys
3016  ret <4 x i32> %b
3017}
3018
3019define arm_aapcs_vfpcc <8 x i16> @uadd_satqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) {
3020; CHECK-LABEL: uadd_satqr_v8i16_y:
3021; CHECK:       @ %bb.0: @ %entry
3022; CHECK-NEXT:    vdup.16 q1, r0
3023; CHECK-NEXT:    vctp.16 r1
3024; CHECK-NEXT:    vpst
3025; CHECK-NEXT:    vqaddt.u16 q1, q0, r0
3026; CHECK-NEXT:    vmov q0, q1
3027; CHECK-NEXT:    bx lr
3028entry:
3029  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
3030  %i = insertelement <8 x i16> undef, i16 %y, i32 0
3031  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
3032  %a = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
3033  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys
3034  ret <8 x i16> %b
3035}
3036
3037define arm_aapcs_vfpcc <16 x i8> @uadd_satqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) {
3038; CHECK-LABEL: uadd_satqr_v16i8_y:
3039; CHECK:       @ %bb.0: @ %entry
3040; CHECK-NEXT:    vdup.8 q1, r0
3041; CHECK-NEXT:    vctp.8 r1
3042; CHECK-NEXT:    vpst
3043; CHECK-NEXT:    vqaddt.u8 q1, q0, r0
3044; CHECK-NEXT:    vmov q0, q1
3045; CHECK-NEXT:    bx lr
3046entry:
3047  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
3048  %i = insertelement <16 x i8> undef, i8 %y, i32 0
3049  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
3050  %a = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
3051  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys
3052  ret <16 x i8> %b
3053}
3054
3055define arm_aapcs_vfpcc <4 x i32> @ssub_satqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) {
3056; CHECK-LABEL: ssub_satqr_v4i32_y:
3057; CHECK:       @ %bb.0: @ %entry
3058; CHECK-NEXT:    vdup.32 q1, r0
3059; CHECK-NEXT:    vctp.32 r1
3060; CHECK-NEXT:    vpst
3061; CHECK-NEXT:    vqsubt.s32 q1, q0, r0
3062; CHECK-NEXT:    vmov q0, q1
3063; CHECK-NEXT:    bx lr
3064entry:
3065  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
3066  %i = insertelement <4 x i32> undef, i32 %y, i32 0
3067  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
3068  %a = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
3069  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys
3070  ret <4 x i32> %b
3071}
3072
3073define arm_aapcs_vfpcc <8 x i16> @ssub_satqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) {
3074; CHECK-LABEL: ssub_satqr_v8i16_y:
3075; CHECK:       @ %bb.0: @ %entry
3076; CHECK-NEXT:    vdup.16 q1, r0
3077; CHECK-NEXT:    vctp.16 r1
3078; CHECK-NEXT:    vpst
3079; CHECK-NEXT:    vqsubt.s16 q1, q0, r0
3080; CHECK-NEXT:    vmov q0, q1
3081; CHECK-NEXT:    bx lr
3082entry:
3083  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
3084  %i = insertelement <8 x i16> undef, i16 %y, i32 0
3085  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
3086  %a = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
3087  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys
3088  ret <8 x i16> %b
3089}
3090
3091define arm_aapcs_vfpcc <16 x i8> @ssub_satqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) {
3092; CHECK-LABEL: ssub_satqr_v16i8_y:
3093; CHECK:       @ %bb.0: @ %entry
3094; CHECK-NEXT:    vdup.8 q1, r0
3095; CHECK-NEXT:    vctp.8 r1
3096; CHECK-NEXT:    vpst
3097; CHECK-NEXT:    vqsubt.s8 q1, q0, r0
3098; CHECK-NEXT:    vmov q0, q1
3099; CHECK-NEXT:    bx lr
3100entry:
3101  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
3102  %i = insertelement <16 x i8> undef, i8 %y, i32 0
3103  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
3104  %a = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
3105  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys
3106  ret <16 x i8> %b
3107}
3108
3109define arm_aapcs_vfpcc <4 x i32> @usub_satqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) {
3110; CHECK-LABEL: usub_satqr_v4i32_y:
3111; CHECK:       @ %bb.0: @ %entry
3112; CHECK-NEXT:    vdup.32 q1, r0
3113; CHECK-NEXT:    vctp.32 r1
3114; CHECK-NEXT:    vpst
3115; CHECK-NEXT:    vqsubt.u32 q1, q0, r0
3116; CHECK-NEXT:    vmov q0, q1
3117; CHECK-NEXT:    bx lr
3118entry:
3119  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
3120  %i = insertelement <4 x i32> undef, i32 %y, i32 0
3121  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
3122  %a = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
3123  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys
3124  ret <4 x i32> %b
3125}
3126
3127define arm_aapcs_vfpcc <8 x i16> @usub_satqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) {
3128; CHECK-LABEL: usub_satqr_v8i16_y:
3129; CHECK:       @ %bb.0: @ %entry
3130; CHECK-NEXT:    vdup.16 q1, r0
3131; CHECK-NEXT:    vctp.16 r1
3132; CHECK-NEXT:    vpst
3133; CHECK-NEXT:    vqsubt.u16 q1, q0, r0
3134; CHECK-NEXT:    vmov q0, q1
3135; CHECK-NEXT:    bx lr
3136entry:
3137  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
3138  %i = insertelement <8 x i16> undef, i16 %y, i32 0
3139  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
3140  %a = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
3141  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys
3142  ret <8 x i16> %b
3143}
3144
3145define arm_aapcs_vfpcc <16 x i8> @usub_satqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) {
3146; CHECK-LABEL: usub_satqr_v16i8_y:
3147; CHECK:       @ %bb.0: @ %entry
3148; CHECK-NEXT:    vdup.8 q1, r0
3149; CHECK-NEXT:    vctp.8 r1
3150; CHECK-NEXT:    vpst
3151; CHECK-NEXT:    vqsubt.u8 q1, q0, r0
3152; CHECK-NEXT:    vmov q0, q1
3153; CHECK-NEXT:    bx lr
3154entry:
3155  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
3156  %i = insertelement <16 x i8> undef, i8 %y, i32 0
3157  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
3158  %a = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
3159  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys
3160  ret <16 x i8> %b
3161}
3162
3163declare <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8>, <16 x i8>)
3164declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>)
3165declare <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32>, <4 x i32>)
3166declare <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8>, <16 x i8>)
3167declare <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16>, <8 x i16>)
3168declare <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32>, <4 x i32>)
3169declare <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8>, <16 x i8>)
3170declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>)
3171declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>)
3172declare <16 x i8> @llvm.usub.sat.v16i8(<16 x i8>, <16 x i8>)
3173declare <8 x i16> @llvm.usub.sat.v8i16(<8 x i16>, <8 x i16>)
3174declare <4 x i32> @llvm.usub.sat.v4i32(<4 x i32>, <4 x i32>)
3175
3176declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
3177declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>)
3178
3179declare <16 x i1> @llvm.arm.mve.vctp8(i32)
3180declare <8 x i1> @llvm.arm.mve.vctp16(i32)
3181declare <4 x i1> @llvm.arm.mve.vctp32(i32)
3182