xref: /llvm-project/llvm/test/CodeGen/Thumb2/mve-intrinsics/vcvt_anpm.ll (revision 1a8cbfa514ff83ac62c20deec0d9ea2c6606bbdf)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
3
4define arm_aapcs_vfpcc <8 x i16> @test_vcvtaq_s16_f16(<8 x half> %a) {
5; CHECK-LABEL: test_vcvtaq_s16_f16:
6; CHECK:       @ %bb.0: @ %entry
7; CHECK-NEXT:    vcvta.s16.f16 q0, q0
8; CHECK-NEXT:    bx lr
9entry:
10  %0 = tail call <8 x i16> @llvm.arm.mve.vcvta.v8i16.v8f16(i32 0, <8 x half> %a)
11  ret <8 x i16> %0
12}
13
14define arm_aapcs_vfpcc <4 x i32> @test_vcvtaq_s32_f32(<4 x float> %a) {
15; CHECK-LABEL: test_vcvtaq_s32_f32:
16; CHECK:       @ %bb.0: @ %entry
17; CHECK-NEXT:    vcvta.s32.f32 q0, q0
18; CHECK-NEXT:    bx lr
19entry:
20  %0 = tail call <4 x i32> @llvm.arm.mve.vcvta.v4i32.v4f32(i32 0, <4 x float> %a)
21  ret <4 x i32> %0
22}
23
24define arm_aapcs_vfpcc <8 x i16> @test_vcvtaq_u16_f16(<8 x half> %a) {
25; CHECK-LABEL: test_vcvtaq_u16_f16:
26; CHECK:       @ %bb.0: @ %entry
27; CHECK-NEXT:    vcvta.u16.f16 q0, q0
28; CHECK-NEXT:    bx lr
29entry:
30  %0 = tail call <8 x i16> @llvm.arm.mve.vcvta.v8i16.v8f16(i32 1, <8 x half> %a)
31  ret <8 x i16> %0
32}
33
34define arm_aapcs_vfpcc <4 x i32> @test_vcvtaq_u32_f32(<4 x float> %a) {
35; CHECK-LABEL: test_vcvtaq_u32_f32:
36; CHECK:       @ %bb.0: @ %entry
37; CHECK-NEXT:    vcvta.u32.f32 q0, q0
38; CHECK-NEXT:    bx lr
39entry:
40  %0 = tail call <4 x i32> @llvm.arm.mve.vcvta.v4i32.v4f32(i32 1, <4 x float> %a)
41  ret <4 x i32> %0
42}
43
44define arm_aapcs_vfpcc <8 x i16> @test_vcvtmq_s16_f16(<8 x half> %a) {
45; CHECK-LABEL: test_vcvtmq_s16_f16:
46; CHECK:       @ %bb.0: @ %entry
47; CHECK-NEXT:    vcvtm.s16.f16 q0, q0
48; CHECK-NEXT:    bx lr
49entry:
50  %0 = tail call <8 x i16> @llvm.arm.mve.vcvtm.v8i16.v8f16(i32 0, <8 x half> %a)
51  ret <8 x i16> %0
52}
53
54define arm_aapcs_vfpcc <4 x i32> @test_vcvtmq_s32_f32(<4 x float> %a) {
55; CHECK-LABEL: test_vcvtmq_s32_f32:
56; CHECK:       @ %bb.0: @ %entry
57; CHECK-NEXT:    vcvtm.s32.f32 q0, q0
58; CHECK-NEXT:    bx lr
59entry:
60  %0 = tail call <4 x i32> @llvm.arm.mve.vcvtm.v4i32.v4f32(i32 0, <4 x float> %a)
61  ret <4 x i32> %0
62}
63
64define arm_aapcs_vfpcc <8 x i16> @test_vcvtmq_u16_f16(<8 x half> %a) {
65; CHECK-LABEL: test_vcvtmq_u16_f16:
66; CHECK:       @ %bb.0: @ %entry
67; CHECK-NEXT:    vcvtm.u16.f16 q0, q0
68; CHECK-NEXT:    bx lr
69entry:
70  %0 = tail call <8 x i16> @llvm.arm.mve.vcvtm.v8i16.v8f16(i32 1, <8 x half> %a)
71  ret <8 x i16> %0
72}
73
74define arm_aapcs_vfpcc <4 x i32> @test_vcvtmq_u32_f32(<4 x float> %a) {
75; CHECK-LABEL: test_vcvtmq_u32_f32:
76; CHECK:       @ %bb.0: @ %entry
77; CHECK-NEXT:    vcvtm.u32.f32 q0, q0
78; CHECK-NEXT:    bx lr
79entry:
80  %0 = tail call <4 x i32> @llvm.arm.mve.vcvtm.v4i32.v4f32(i32 1, <4 x float> %a)
81  ret <4 x i32> %0
82}
83
84define arm_aapcs_vfpcc <8 x i16> @test_vcvtnq_s16_f16(<8 x half> %a) {
85; CHECK-LABEL: test_vcvtnq_s16_f16:
86; CHECK:       @ %bb.0: @ %entry
87; CHECK-NEXT:    vcvtn.s16.f16 q0, q0
88; CHECK-NEXT:    bx lr
89entry:
90  %0 = tail call <8 x i16> @llvm.arm.mve.vcvtn.v8i16.v8f16(i32 0, <8 x half> %a)
91  ret <8 x i16> %0
92}
93
94define arm_aapcs_vfpcc <4 x i32> @test_vcvtnq_s32_f32(<4 x float> %a) {
95; CHECK-LABEL: test_vcvtnq_s32_f32:
96; CHECK:       @ %bb.0: @ %entry
97; CHECK-NEXT:    vcvtn.s32.f32 q0, q0
98; CHECK-NEXT:    bx lr
99entry:
100  %0 = tail call <4 x i32> @llvm.arm.mve.vcvtn.v4i32.v4f32(i32 0, <4 x float> %a)
101  ret <4 x i32> %0
102}
103
104define arm_aapcs_vfpcc <8 x i16> @test_vcvtnq_u16_f16(<8 x half> %a) {
105; CHECK-LABEL: test_vcvtnq_u16_f16:
106; CHECK:       @ %bb.0: @ %entry
107; CHECK-NEXT:    vcvtn.u16.f16 q0, q0
108; CHECK-NEXT:    bx lr
109entry:
110  %0 = tail call <8 x i16> @llvm.arm.mve.vcvtn.v8i16.v8f16(i32 1, <8 x half> %a)
111  ret <8 x i16> %0
112}
113
114define arm_aapcs_vfpcc <4 x i32> @test_vcvtnq_u32_f32(<4 x float> %a) {
115; CHECK-LABEL: test_vcvtnq_u32_f32:
116; CHECK:       @ %bb.0: @ %entry
117; CHECK-NEXT:    vcvtn.u32.f32 q0, q0
118; CHECK-NEXT:    bx lr
119entry:
120  %0 = tail call <4 x i32> @llvm.arm.mve.vcvtn.v4i32.v4f32(i32 1, <4 x float> %a)
121  ret <4 x i32> %0
122}
123
124define arm_aapcs_vfpcc <8 x i16> @test_vcvtpq_s16_f16(<8 x half> %a) {
125; CHECK-LABEL: test_vcvtpq_s16_f16:
126; CHECK:       @ %bb.0: @ %entry
127; CHECK-NEXT:    vcvtp.s16.f16 q0, q0
128; CHECK-NEXT:    bx lr
129entry:
130  %0 = tail call <8 x i16> @llvm.arm.mve.vcvtp.v8i16.v8f16(i32 0, <8 x half> %a)
131  ret <8 x i16> %0
132}
133
134define arm_aapcs_vfpcc <4 x i32> @test_vcvtpq_s32_f32(<4 x float> %a) {
135; CHECK-LABEL: test_vcvtpq_s32_f32:
136; CHECK:       @ %bb.0: @ %entry
137; CHECK-NEXT:    vcvtp.s32.f32 q0, q0
138; CHECK-NEXT:    bx lr
139entry:
140  %0 = tail call <4 x i32> @llvm.arm.mve.vcvtp.v4i32.v4f32(i32 0, <4 x float> %a)
141  ret <4 x i32> %0
142}
143
144define arm_aapcs_vfpcc <8 x i16> @test_vcvtpq_u16_f16(<8 x half> %a) {
145; CHECK-LABEL: test_vcvtpq_u16_f16:
146; CHECK:       @ %bb.0: @ %entry
147; CHECK-NEXT:    vcvtp.u16.f16 q0, q0
148; CHECK-NEXT:    bx lr
149entry:
150  %0 = tail call <8 x i16> @llvm.arm.mve.vcvtp.v8i16.v8f16(i32 1, <8 x half> %a)
151  ret <8 x i16> %0
152}
153
154define arm_aapcs_vfpcc <4 x i32> @test_vcvtpq_u32_f32(<4 x float> %a) {
155; CHECK-LABEL: test_vcvtpq_u32_f32:
156; CHECK:       @ %bb.0: @ %entry
157; CHECK-NEXT:    vcvtp.u32.f32 q0, q0
158; CHECK-NEXT:    bx lr
159entry:
160  %0 = tail call <4 x i32> @llvm.arm.mve.vcvtp.v4i32.v4f32(i32 1, <4 x float> %a)
161  ret <4 x i32> %0
162}
163
164define arm_aapcs_vfpcc <8 x i16> @test_vcvtaq_m_s16_f16(<8 x i16> %inactive, <8 x half> %a, i16 zeroext %p) {
165; CHECK-LABEL: test_vcvtaq_m_s16_f16:
166; CHECK:       @ %bb.0: @ %entry
167; CHECK-NEXT:    vmsr p0, r0
168; CHECK-NEXT:    vpst
169; CHECK-NEXT:    vcvtat.s16.f16 q0, q1
170; CHECK-NEXT:    bx lr
171entry:
172  %0 = zext i16 %p to i32
173  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
174  %2 = tail call <8 x i16> @llvm.arm.mve.vcvta.predicated.v8i16.v8f16.v8i1(i32 0, <8 x i16> %inactive, <8 x half> %a, <8 x i1> %1)
175  ret <8 x i16> %2
176}
177
178define arm_aapcs_vfpcc <4 x i32> @test_vcvtaq_m_s32_f32(<4 x i32> %inactive, <4 x float> %a, i16 zeroext %p) {
179; CHECK-LABEL: test_vcvtaq_m_s32_f32:
180; CHECK:       @ %bb.0: @ %entry
181; CHECK-NEXT:    vmsr p0, r0
182; CHECK-NEXT:    vpst
183; CHECK-NEXT:    vcvtat.s32.f32 q0, q1
184; CHECK-NEXT:    bx lr
185entry:
186  %0 = zext i16 %p to i32
187  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
188  %2 = tail call <4 x i32> @llvm.arm.mve.vcvta.predicated.v4i32.v4f32.v4i1(i32 0, <4 x i32> %inactive, <4 x float> %a, <4 x i1> %1)
189  ret <4 x i32> %2
190}
191
192define arm_aapcs_vfpcc <8 x i16> @test_vcvtaq_m_u16_f16(<8 x i16> %inactive, <8 x half> %a, i16 zeroext %p) {
193; CHECK-LABEL: test_vcvtaq_m_u16_f16:
194; CHECK:       @ %bb.0: @ %entry
195; CHECK-NEXT:    vmsr p0, r0
196; CHECK-NEXT:    vpst
197; CHECK-NEXT:    vcvtat.u16.f16 q0, q1
198; CHECK-NEXT:    bx lr
199entry:
200  %0 = zext i16 %p to i32
201  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
202  %2 = tail call <8 x i16> @llvm.arm.mve.vcvta.predicated.v8i16.v8f16.v8i1(i32 1, <8 x i16> %inactive, <8 x half> %a, <8 x i1> %1)
203  ret <8 x i16> %2
204}
205
206define arm_aapcs_vfpcc <4 x i32> @test_vcvtaq_m_u32_f32(<4 x i32> %inactive, <4 x float> %a, i16 zeroext %p) {
207; CHECK-LABEL: test_vcvtaq_m_u32_f32:
208; CHECK:       @ %bb.0: @ %entry
209; CHECK-NEXT:    vmsr p0, r0
210; CHECK-NEXT:    vpst
211; CHECK-NEXT:    vcvtat.u32.f32 q0, q1
212; CHECK-NEXT:    bx lr
213entry:
214  %0 = zext i16 %p to i32
215  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
216  %2 = tail call <4 x i32> @llvm.arm.mve.vcvta.predicated.v4i32.v4f32.v4i1(i32 1, <4 x i32> %inactive, <4 x float> %a, <4 x i1> %1)
217  ret <4 x i32> %2
218}
219
220define arm_aapcs_vfpcc <8 x i16> @test_vcvtmq_m_s16_f16(<8 x i16> %inactive, <8 x half> %a, i16 zeroext %p) {
221; CHECK-LABEL: test_vcvtmq_m_s16_f16:
222; CHECK:       @ %bb.0: @ %entry
223; CHECK-NEXT:    vmsr p0, r0
224; CHECK-NEXT:    vpst
225; CHECK-NEXT:    vcvtmt.s16.f16 q0, q1
226; CHECK-NEXT:    bx lr
227entry:
228  %0 = zext i16 %p to i32
229  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
230  %2 = tail call <8 x i16> @llvm.arm.mve.vcvtm.predicated.v8i16.v8f16.v8i1(i32 0, <8 x i16> %inactive, <8 x half> %a, <8 x i1> %1)
231  ret <8 x i16> %2
232}
233
234define arm_aapcs_vfpcc <4 x i32> @test_vcvtmq_m_s32_f32(<4 x i32> %inactive, <4 x float> %a, i16 zeroext %p) {
235; CHECK-LABEL: test_vcvtmq_m_s32_f32:
236; CHECK:       @ %bb.0: @ %entry
237; CHECK-NEXT:    vmsr p0, r0
238; CHECK-NEXT:    vpst
239; CHECK-NEXT:    vcvtmt.s32.f32 q0, q1
240; CHECK-NEXT:    bx lr
241entry:
242  %0 = zext i16 %p to i32
243  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
244  %2 = tail call <4 x i32> @llvm.arm.mve.vcvtm.predicated.v4i32.v4f32.v4i1(i32 0, <4 x i32> %inactive, <4 x float> %a, <4 x i1> %1)
245  ret <4 x i32> %2
246}
247
248define arm_aapcs_vfpcc <8 x i16> @test_vcvtmq_m_u16_f16(<8 x i16> %inactive, <8 x half> %a, i16 zeroext %p) {
249; CHECK-LABEL: test_vcvtmq_m_u16_f16:
250; CHECK:       @ %bb.0: @ %entry
251; CHECK-NEXT:    vmsr p0, r0
252; CHECK-NEXT:    vpst
253; CHECK-NEXT:    vcvtmt.u16.f16 q0, q1
254; CHECK-NEXT:    bx lr
255entry:
256  %0 = zext i16 %p to i32
257  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
258  %2 = tail call <8 x i16> @llvm.arm.mve.vcvtm.predicated.v8i16.v8f16.v8i1(i32 1, <8 x i16> %inactive, <8 x half> %a, <8 x i1> %1)
259  ret <8 x i16> %2
260}
261
262define arm_aapcs_vfpcc <4 x i32> @test_vcvtmq_m_u32_f32(<4 x i32> %inactive, <4 x float> %a, i16 zeroext %p) {
263; CHECK-LABEL: test_vcvtmq_m_u32_f32:
264; CHECK:       @ %bb.0: @ %entry
265; CHECK-NEXT:    vmsr p0, r0
266; CHECK-NEXT:    vpst
267; CHECK-NEXT:    vcvtmt.u32.f32 q0, q1
268; CHECK-NEXT:    bx lr
269entry:
270  %0 = zext i16 %p to i32
271  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
272  %2 = tail call <4 x i32> @llvm.arm.mve.vcvtm.predicated.v4i32.v4f32.v4i1(i32 1, <4 x i32> %inactive, <4 x float> %a, <4 x i1> %1)
273  ret <4 x i32> %2
274}
275
276define arm_aapcs_vfpcc <8 x i16> @test_vcvtnq_m_s16_f16(<8 x i16> %inactive, <8 x half> %a, i16 zeroext %p) {
277; CHECK-LABEL: test_vcvtnq_m_s16_f16:
278; CHECK:       @ %bb.0: @ %entry
279; CHECK-NEXT:    vmsr p0, r0
280; CHECK-NEXT:    vpst
281; CHECK-NEXT:    vcvtnt.s16.f16 q0, q1
282; CHECK-NEXT:    bx lr
283entry:
284  %0 = zext i16 %p to i32
285  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
286  %2 = tail call <8 x i16> @llvm.arm.mve.vcvtn.predicated.v8i16.v8f16.v8i1(i32 0, <8 x i16> %inactive, <8 x half> %a, <8 x i1> %1)
287  ret <8 x i16> %2
288}
289
290define arm_aapcs_vfpcc <4 x i32> @test_vcvtnq_m_s32_f32(<4 x i32> %inactive, <4 x float> %a, i16 zeroext %p) {
291; CHECK-LABEL: test_vcvtnq_m_s32_f32:
292; CHECK:       @ %bb.0: @ %entry
293; CHECK-NEXT:    vmsr p0, r0
294; CHECK-NEXT:    vpst
295; CHECK-NEXT:    vcvtnt.s32.f32 q0, q1
296; CHECK-NEXT:    bx lr
297entry:
298  %0 = zext i16 %p to i32
299  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
300  %2 = tail call <4 x i32> @llvm.arm.mve.vcvtn.predicated.v4i32.v4f32.v4i1(i32 0, <4 x i32> %inactive, <4 x float> %a, <4 x i1> %1)
301  ret <4 x i32> %2
302}
303
304define arm_aapcs_vfpcc <8 x i16> @test_vcvtnq_m_u16_f16(<8 x i16> %inactive, <8 x half> %a, i16 zeroext %p) {
305; CHECK-LABEL: test_vcvtnq_m_u16_f16:
306; CHECK:       @ %bb.0: @ %entry
307; CHECK-NEXT:    vmsr p0, r0
308; CHECK-NEXT:    vpst
309; CHECK-NEXT:    vcvtnt.u16.f16 q0, q1
310; CHECK-NEXT:    bx lr
311entry:
312  %0 = zext i16 %p to i32
313  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
314  %2 = tail call <8 x i16> @llvm.arm.mve.vcvtn.predicated.v8i16.v8f16.v8i1(i32 1, <8 x i16> %inactive, <8 x half> %a, <8 x i1> %1)
315  ret <8 x i16> %2
316}
317
318define arm_aapcs_vfpcc <4 x i32> @test_vcvtnq_m_u32_f32(<4 x i32> %inactive, <4 x float> %a, i16 zeroext %p) {
319; CHECK-LABEL: test_vcvtnq_m_u32_f32:
320; CHECK:       @ %bb.0: @ %entry
321; CHECK-NEXT:    vmsr p0, r0
322; CHECK-NEXT:    vpst
323; CHECK-NEXT:    vcvtnt.u32.f32 q0, q1
324; CHECK-NEXT:    bx lr
325entry:
326  %0 = zext i16 %p to i32
327  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
328  %2 = tail call <4 x i32> @llvm.arm.mve.vcvtn.predicated.v4i32.v4f32.v4i1(i32 1, <4 x i32> %inactive, <4 x float> %a, <4 x i1> %1)
329  ret <4 x i32> %2
330}
331
332define arm_aapcs_vfpcc <8 x i16> @test_vcvtpq_m_s16_f16(<8 x i16> %inactive, <8 x half> %a, i16 zeroext %p) {
333; CHECK-LABEL: test_vcvtpq_m_s16_f16:
334; CHECK:       @ %bb.0: @ %entry
335; CHECK-NEXT:    vmsr p0, r0
336; CHECK-NEXT:    vpst
337; CHECK-NEXT:    vcvtpt.s16.f16 q0, q1
338; CHECK-NEXT:    bx lr
339entry:
340  %0 = zext i16 %p to i32
341  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
342  %2 = tail call <8 x i16> @llvm.arm.mve.vcvtp.predicated.v8i16.v8f16.v8i1(i32 0, <8 x i16> %inactive, <8 x half> %a, <8 x i1> %1)
343  ret <8 x i16> %2
344}
345
346define arm_aapcs_vfpcc <4 x i32> @test_vcvtpq_m_s32_f32(<4 x i32> %inactive, <4 x float> %a, i16 zeroext %p) {
347; CHECK-LABEL: test_vcvtpq_m_s32_f32:
348; CHECK:       @ %bb.0: @ %entry
349; CHECK-NEXT:    vmsr p0, r0
350; CHECK-NEXT:    vpst
351; CHECK-NEXT:    vcvtpt.s32.f32 q0, q1
352; CHECK-NEXT:    bx lr
353entry:
354  %0 = zext i16 %p to i32
355  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
356  %2 = tail call <4 x i32> @llvm.arm.mve.vcvtp.predicated.v4i32.v4f32.v4i1(i32 0, <4 x i32> %inactive, <4 x float> %a, <4 x i1> %1)
357  ret <4 x i32> %2
358}
359
360define arm_aapcs_vfpcc <8 x i16> @test_vcvtpq_m_u16_f16(<8 x i16> %inactive, <8 x half> %a, i16 zeroext %p) {
361; CHECK-LABEL: test_vcvtpq_m_u16_f16:
362; CHECK:       @ %bb.0: @ %entry
363; CHECK-NEXT:    vmsr p0, r0
364; CHECK-NEXT:    vpst
365; CHECK-NEXT:    vcvtpt.u16.f16 q0, q1
366; CHECK-NEXT:    bx lr
367entry:
368  %0 = zext i16 %p to i32
369  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
370  %2 = tail call <8 x i16> @llvm.arm.mve.vcvtp.predicated.v8i16.v8f16.v8i1(i32 1, <8 x i16> %inactive, <8 x half> %a, <8 x i1> %1)
371  ret <8 x i16> %2
372}
373
374define arm_aapcs_vfpcc <4 x i32> @test_vcvtpq_m_u32_f32(<4 x i32> %inactive, <4 x float> %a, i16 zeroext %p) {
375; CHECK-LABEL: test_vcvtpq_m_u32_f32:
376; CHECK:       @ %bb.0: @ %entry
377; CHECK-NEXT:    vmsr p0, r0
378; CHECK-NEXT:    vpst
379; CHECK-NEXT:    vcvtpt.u32.f32 q0, q1
380; CHECK-NEXT:    bx lr
381entry:
382  %0 = zext i16 %p to i32
383  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
384  %2 = tail call <4 x i32> @llvm.arm.mve.vcvtp.predicated.v4i32.v4f32.v4i1(i32 1, <4 x i32> %inactive, <4 x float> %a, <4 x i1> %1)
385  ret <4 x i32> %2
386}
387
388define arm_aapcs_vfpcc <8 x i16> @test_vcvtaq_x_s16_f16(<8 x half> %a, i16 zeroext %p) {
389; CHECK-LABEL: test_vcvtaq_x_s16_f16:
390; CHECK:       @ %bb.0: @ %entry
391; CHECK-NEXT:    vmsr p0, r0
392; CHECK-NEXT:    vpst
393; CHECK-NEXT:    vcvtat.s16.f16 q0, q0
394; CHECK-NEXT:    bx lr
395entry:
396  %0 = zext i16 %p to i32
397  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
398  %2 = tail call <8 x i16> @llvm.arm.mve.vcvta.predicated.v8i16.v8f16.v8i1(i32 0, <8 x i16> undef, <8 x half> %a, <8 x i1> %1)
399  ret <8 x i16> %2
400}
401
402define arm_aapcs_vfpcc <4 x i32> @test_vcvtaq_x_s32_f32(<4 x float> %a, i16 zeroext %p) {
403; CHECK-LABEL: test_vcvtaq_x_s32_f32:
404; CHECK:       @ %bb.0: @ %entry
405; CHECK-NEXT:    vmsr p0, r0
406; CHECK-NEXT:    vpst
407; CHECK-NEXT:    vcvtat.s32.f32 q0, q0
408; CHECK-NEXT:    bx lr
409entry:
410  %0 = zext i16 %p to i32
411  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
412  %2 = tail call <4 x i32> @llvm.arm.mve.vcvta.predicated.v4i32.v4f32.v4i1(i32 0, <4 x i32> undef, <4 x float> %a, <4 x i1> %1)
413  ret <4 x i32> %2
414}
415
416define arm_aapcs_vfpcc <8 x i16> @test_vcvtaq_x_u16_f16(<8 x half> %a, i16 zeroext %p) {
417; CHECK-LABEL: test_vcvtaq_x_u16_f16:
418; CHECK:       @ %bb.0: @ %entry
419; CHECK-NEXT:    vmsr p0, r0
420; CHECK-NEXT:    vpst
421; CHECK-NEXT:    vcvtat.u16.f16 q0, q0
422; CHECK-NEXT:    bx lr
423entry:
424  %0 = zext i16 %p to i32
425  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
426  %2 = tail call <8 x i16> @llvm.arm.mve.vcvta.predicated.v8i16.v8f16.v8i1(i32 1, <8 x i16> undef, <8 x half> %a, <8 x i1> %1)
427  ret <8 x i16> %2
428}
429
430define arm_aapcs_vfpcc <4 x i32> @test_vcvtaq_x_u32_f32(<4 x float> %a, i16 zeroext %p) {
431; CHECK-LABEL: test_vcvtaq_x_u32_f32:
432; CHECK:       @ %bb.0: @ %entry
433; CHECK-NEXT:    vmsr p0, r0
434; CHECK-NEXT:    vpst
435; CHECK-NEXT:    vcvtat.u32.f32 q0, q0
436; CHECK-NEXT:    bx lr
437entry:
438  %0 = zext i16 %p to i32
439  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
440  %2 = tail call <4 x i32> @llvm.arm.mve.vcvta.predicated.v4i32.v4f32.v4i1(i32 1, <4 x i32> undef, <4 x float> %a, <4 x i1> %1)
441  ret <4 x i32> %2
442}
443
444define arm_aapcs_vfpcc <8 x i16> @test_vcvtmq_x_s16_f16(<8 x half> %a, i16 zeroext %p) {
445; CHECK-LABEL: test_vcvtmq_x_s16_f16:
446; CHECK:       @ %bb.0: @ %entry
447; CHECK-NEXT:    vmsr p0, r0
448; CHECK-NEXT:    vpst
449; CHECK-NEXT:    vcvtmt.s16.f16 q0, q0
450; CHECK-NEXT:    bx lr
451entry:
452  %0 = zext i16 %p to i32
453  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
454  %2 = tail call <8 x i16> @llvm.arm.mve.vcvtm.predicated.v8i16.v8f16.v8i1(i32 0, <8 x i16> undef, <8 x half> %a, <8 x i1> %1)
455  ret <8 x i16> %2
456}
457
458define arm_aapcs_vfpcc <4 x i32> @test_vcvtmq_x_s32_f32(<4 x float> %a, i16 zeroext %p) {
459; CHECK-LABEL: test_vcvtmq_x_s32_f32:
460; CHECK:       @ %bb.0: @ %entry
461; CHECK-NEXT:    vmsr p0, r0
462; CHECK-NEXT:    vpst
463; CHECK-NEXT:    vcvtmt.s32.f32 q0, q0
464; CHECK-NEXT:    bx lr
465entry:
466  %0 = zext i16 %p to i32
467  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
468  %2 = tail call <4 x i32> @llvm.arm.mve.vcvtm.predicated.v4i32.v4f32.v4i1(i32 0, <4 x i32> undef, <4 x float> %a, <4 x i1> %1)
469  ret <4 x i32> %2
470}
471
472define arm_aapcs_vfpcc <8 x i16> @test_vcvtmq_x_u16_f16(<8 x half> %a, i16 zeroext %p) {
473; CHECK-LABEL: test_vcvtmq_x_u16_f16:
474; CHECK:       @ %bb.0: @ %entry
475; CHECK-NEXT:    vmsr p0, r0
476; CHECK-NEXT:    vpst
477; CHECK-NEXT:    vcvtmt.u16.f16 q0, q0
478; CHECK-NEXT:    bx lr
479entry:
480  %0 = zext i16 %p to i32
481  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
482  %2 = tail call <8 x i16> @llvm.arm.mve.vcvtm.predicated.v8i16.v8f16.v8i1(i32 1, <8 x i16> undef, <8 x half> %a, <8 x i1> %1)
483  ret <8 x i16> %2
484}
485
486define arm_aapcs_vfpcc <4 x i32> @test_vcvtmq_x_u32_f32(<4 x float> %a, i16 zeroext %p) {
487; CHECK-LABEL: test_vcvtmq_x_u32_f32:
488; CHECK:       @ %bb.0: @ %entry
489; CHECK-NEXT:    vmsr p0, r0
490; CHECK-NEXT:    vpst
491; CHECK-NEXT:    vcvtmt.u32.f32 q0, q0
492; CHECK-NEXT:    bx lr
493entry:
494  %0 = zext i16 %p to i32
495  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
496  %2 = tail call <4 x i32> @llvm.arm.mve.vcvtm.predicated.v4i32.v4f32.v4i1(i32 1, <4 x i32> undef, <4 x float> %a, <4 x i1> %1)
497  ret <4 x i32> %2
498}
499
500define arm_aapcs_vfpcc <8 x i16> @test_vcvtnq_x_s16_f16(<8 x half> %a, i16 zeroext %p) {
501; CHECK-LABEL: test_vcvtnq_x_s16_f16:
502; CHECK:       @ %bb.0: @ %entry
503; CHECK-NEXT:    vmsr p0, r0
504; CHECK-NEXT:    vpst
505; CHECK-NEXT:    vcvtnt.s16.f16 q0, q0
506; CHECK-NEXT:    bx lr
507entry:
508  %0 = zext i16 %p to i32
509  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
510  %2 = tail call <8 x i16> @llvm.arm.mve.vcvtn.predicated.v8i16.v8f16.v8i1(i32 0, <8 x i16> undef, <8 x half> %a, <8 x i1> %1)
511  ret <8 x i16> %2
512}
513
514define arm_aapcs_vfpcc <4 x i32> @test_vcvtnq_x_s32_f32(<4 x float> %a, i16 zeroext %p) {
515; CHECK-LABEL: test_vcvtnq_x_s32_f32:
516; CHECK:       @ %bb.0: @ %entry
517; CHECK-NEXT:    vmsr p0, r0
518; CHECK-NEXT:    vpst
519; CHECK-NEXT:    vcvtnt.s32.f32 q0, q0
520; CHECK-NEXT:    bx lr
521entry:
522  %0 = zext i16 %p to i32
523  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
524  %2 = tail call <4 x i32> @llvm.arm.mve.vcvtn.predicated.v4i32.v4f32.v4i1(i32 0, <4 x i32> undef, <4 x float> %a, <4 x i1> %1)
525  ret <4 x i32> %2
526}
527
528define arm_aapcs_vfpcc <8 x i16> @test_vcvtnq_x_u16_f16(<8 x half> %a, i16 zeroext %p) {
529; CHECK-LABEL: test_vcvtnq_x_u16_f16:
530; CHECK:       @ %bb.0: @ %entry
531; CHECK-NEXT:    vmsr p0, r0
532; CHECK-NEXT:    vpst
533; CHECK-NEXT:    vcvtnt.u16.f16 q0, q0
534; CHECK-NEXT:    bx lr
535entry:
536  %0 = zext i16 %p to i32
537  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
538  %2 = tail call <8 x i16> @llvm.arm.mve.vcvtn.predicated.v8i16.v8f16.v8i1(i32 1, <8 x i16> undef, <8 x half> %a, <8 x i1> %1)
539  ret <8 x i16> %2
540}
541
542define arm_aapcs_vfpcc <4 x i32> @test_vcvtnq_x_u32_f32(<4 x float> %a, i16 zeroext %p) {
543; CHECK-LABEL: test_vcvtnq_x_u32_f32:
544; CHECK:       @ %bb.0: @ %entry
545; CHECK-NEXT:    vmsr p0, r0
546; CHECK-NEXT:    vpst
547; CHECK-NEXT:    vcvtnt.u32.f32 q0, q0
548; CHECK-NEXT:    bx lr
549entry:
550  %0 = zext i16 %p to i32
551  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
552  %2 = tail call <4 x i32> @llvm.arm.mve.vcvtn.predicated.v4i32.v4f32.v4i1(i32 1, <4 x i32> undef, <4 x float> %a, <4 x i1> %1)
553  ret <4 x i32> %2
554}
555
556define arm_aapcs_vfpcc <8 x i16> @test_vcvtpq_x_s16_f16(<8 x half> %a, i16 zeroext %p) {
557; CHECK-LABEL: test_vcvtpq_x_s16_f16:
558; CHECK:       @ %bb.0: @ %entry
559; CHECK-NEXT:    vmsr p0, r0
560; CHECK-NEXT:    vpst
561; CHECK-NEXT:    vcvtpt.s16.f16 q0, q0
562; CHECK-NEXT:    bx lr
563entry:
564  %0 = zext i16 %p to i32
565  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
566  %2 = tail call <8 x i16> @llvm.arm.mve.vcvtp.predicated.v8i16.v8f16.v8i1(i32 0, <8 x i16> undef, <8 x half> %a, <8 x i1> %1)
567  ret <8 x i16> %2
568}
569
570define arm_aapcs_vfpcc <4 x i32> @test_vcvtpq_x_s32_f32(<4 x float> %a, i16 zeroext %p) {
571; CHECK-LABEL: test_vcvtpq_x_s32_f32:
572; CHECK:       @ %bb.0: @ %entry
573; CHECK-NEXT:    vmsr p0, r0
574; CHECK-NEXT:    vpst
575; CHECK-NEXT:    vcvtpt.s32.f32 q0, q0
576; CHECK-NEXT:    bx lr
577entry:
578  %0 = zext i16 %p to i32
579  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
580  %2 = tail call <4 x i32> @llvm.arm.mve.vcvtp.predicated.v4i32.v4f32.v4i1(i32 0, <4 x i32> undef, <4 x float> %a, <4 x i1> %1)
581  ret <4 x i32> %2
582}
583
584define arm_aapcs_vfpcc <8 x i16> @test_vcvtpq_x_u16_f16(<8 x half> %a, i16 zeroext %p) {
585; CHECK-LABEL: test_vcvtpq_x_u16_f16:
586; CHECK:       @ %bb.0: @ %entry
587; CHECK-NEXT:    vmsr p0, r0
588; CHECK-NEXT:    vpst
589; CHECK-NEXT:    vcvtpt.u16.f16 q0, q0
590; CHECK-NEXT:    bx lr
591entry:
592  %0 = zext i16 %p to i32
593  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
594  %2 = tail call <8 x i16> @llvm.arm.mve.vcvtp.predicated.v8i16.v8f16.v8i1(i32 1, <8 x i16> undef, <8 x half> %a, <8 x i1> %1)
595  ret <8 x i16> %2
596}
597
598define arm_aapcs_vfpcc <4 x i32> @test_vcvtpq_x_u32_f32(<4 x float> %a, i16 zeroext %p) {
599; CHECK-LABEL: test_vcvtpq_x_u32_f32:
600; CHECK:       @ %bb.0: @ %entry
601; CHECK-NEXT:    vmsr p0, r0
602; CHECK-NEXT:    vpst
603; CHECK-NEXT:    vcvtpt.u32.f32 q0, q0
604; CHECK-NEXT:    bx lr
605entry:
606  %0 = zext i16 %p to i32
607  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
608  %2 = tail call <4 x i32> @llvm.arm.mve.vcvtp.predicated.v4i32.v4f32.v4i1(i32 1, <4 x i32> undef, <4 x float> %a, <4 x i1> %1)
609  ret <4 x i32> %2
610}
611
612declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
613declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
614
615declare <8 x i16> @llvm.arm.mve.vcvta.v8i16.v8f16(i32, <8 x half>)
616declare <4 x i32> @llvm.arm.mve.vcvta.v4i32.v4f32(i32, <4 x float>)
617declare <8 x i16> @llvm.arm.mve.vcvtm.v8i16.v8f16(i32, <8 x half>)
618declare <4 x i32> @llvm.arm.mve.vcvtm.v4i32.v4f32(i32, <4 x float>)
619declare <8 x i16> @llvm.arm.mve.vcvtn.v8i16.v8f16(i32, <8 x half>)
620declare <4 x i32> @llvm.arm.mve.vcvtn.v4i32.v4f32(i32, <4 x float>)
621declare <8 x i16> @llvm.arm.mve.vcvtp.v8i16.v8f16(i32, <8 x half>)
622declare <4 x i32> @llvm.arm.mve.vcvtp.v4i32.v4f32(i32, <4 x float>)
623
624declare <8 x i16> @llvm.arm.mve.vcvta.predicated.v8i16.v8f16.v8i1(i32, <8 x i16>, <8 x half>, <8 x i1>)
625declare <4 x i32> @llvm.arm.mve.vcvta.predicated.v4i32.v4f32.v4i1(i32, <4 x i32>, <4 x float>, <4 x i1>)
626declare <8 x i16> @llvm.arm.mve.vcvtm.predicated.v8i16.v8f16.v8i1(i32, <8 x i16>, <8 x half>, <8 x i1>)
627declare <4 x i32> @llvm.arm.mve.vcvtm.predicated.v4i32.v4f32.v4i1(i32, <4 x i32>, <4 x float>, <4 x i1>)
628declare <8 x i16> @llvm.arm.mve.vcvtn.predicated.v8i16.v8f16.v8i1(i32, <8 x i16>, <8 x half>, <8 x i1>)
629declare <4 x i32> @llvm.arm.mve.vcvtn.predicated.v4i32.v4f32.v4i1(i32, <4 x i32>, <4 x float>, <4 x i1>)
630declare <8 x i16> @llvm.arm.mve.vcvtp.predicated.v8i16.v8f16.v8i1(i32, <8 x i16>, <8 x half>, <8 x i1>)
631declare <4 x i32> @llvm.arm.mve.vcvtp.predicated.v4i32.v4f32.v4i1(i32, <4 x i32>, <4 x float>, <4 x i1>)
632