xref: /llvm-project/llvm/test/CodeGen/Thumb2/mve-vcvt.ll (revision 7b3bbd83c0c24087072ec5b22a76799ab31f87d5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVE
3; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVEFP
4
5define arm_aapcs_vfpcc <4 x float> @foo_float_int32(<4 x i32> %src) {
6; CHECK-MVE-LABEL: foo_float_int32:
7; CHECK-MVE:       @ %bb.0: @ %entry
8; CHECK-MVE-NEXT:    vcvt.f32.s32 s3, s3
9; CHECK-MVE-NEXT:    vcvt.f32.s32 s2, s2
10; CHECK-MVE-NEXT:    vcvt.f32.s32 s1, s1
11; CHECK-MVE-NEXT:    vcvt.f32.s32 s0, s0
12; CHECK-MVE-NEXT:    bx lr
13;
14; CHECK-MVEFP-LABEL: foo_float_int32:
15; CHECK-MVEFP:       @ %bb.0: @ %entry
16; CHECK-MVEFP-NEXT:    vcvt.f32.s32 q0, q0
17; CHECK-MVEFP-NEXT:    bx lr
18entry:
19  %out = sitofp <4 x i32> %src to <4 x float>
20  ret <4 x float> %out
21}
22
23define arm_aapcs_vfpcc <4 x float> @foo_float_uint32(<4 x i32> %src) {
24; CHECK-MVE-LABEL: foo_float_uint32:
25; CHECK-MVE:       @ %bb.0: @ %entry
26; CHECK-MVE-NEXT:    vcvt.f32.u32 s3, s3
27; CHECK-MVE-NEXT:    vcvt.f32.u32 s2, s2
28; CHECK-MVE-NEXT:    vcvt.f32.u32 s1, s1
29; CHECK-MVE-NEXT:    vcvt.f32.u32 s0, s0
30; CHECK-MVE-NEXT:    bx lr
31;
32; CHECK-MVEFP-LABEL: foo_float_uint32:
33; CHECK-MVEFP:       @ %bb.0: @ %entry
34; CHECK-MVEFP-NEXT:    vcvt.f32.u32 q0, q0
35; CHECK-MVEFP-NEXT:    bx lr
36entry:
37  %out = uitofp <4 x i32> %src to <4 x float>
38  ret <4 x float> %out
39}
40
41define arm_aapcs_vfpcc <4 x i32> @foo_int32_float(<4 x float> %src) {
42; CHECK-MVE-LABEL: foo_int32_float:
43; CHECK-MVE:       @ %bb.0: @ %entry
44; CHECK-MVE-NEXT:    vcvt.s32.f32 s2, s2
45; CHECK-MVE-NEXT:    vcvt.s32.f32 s0, s0
46; CHECK-MVE-NEXT:    vcvt.s32.f32 s4, s3
47; CHECK-MVE-NEXT:    vcvt.s32.f32 s6, s1
48; CHECK-MVE-NEXT:    vmov r0, s2
49; CHECK-MVE-NEXT:    vmov r1, s0
50; CHECK-MVE-NEXT:    vmov q0[2], q0[0], r1, r0
51; CHECK-MVE-NEXT:    vmov r0, s4
52; CHECK-MVE-NEXT:    vmov r1, s6
53; CHECK-MVE-NEXT:    vmov q0[3], q0[1], r1, r0
54; CHECK-MVE-NEXT:    bx lr
55;
56; CHECK-MVEFP-LABEL: foo_int32_float:
57; CHECK-MVEFP:       @ %bb.0: @ %entry
58; CHECK-MVEFP-NEXT:    vcvt.s32.f32 q0, q0
59; CHECK-MVEFP-NEXT:    bx lr
60entry:
61  %out = fptosi <4 x float> %src to <4 x i32>
62  ret <4 x i32> %out
63}
64
65define arm_aapcs_vfpcc <4 x i32> @foo_uint32_float(<4 x float> %src) {
66; CHECK-MVE-LABEL: foo_uint32_float:
67; CHECK-MVE:       @ %bb.0: @ %entry
68; CHECK-MVE-NEXT:    vcvt.u32.f32 s2, s2
69; CHECK-MVE-NEXT:    vcvt.u32.f32 s0, s0
70; CHECK-MVE-NEXT:    vcvt.u32.f32 s4, s3
71; CHECK-MVE-NEXT:    vcvt.u32.f32 s6, s1
72; CHECK-MVE-NEXT:    vmov r0, s2
73; CHECK-MVE-NEXT:    vmov r1, s0
74; CHECK-MVE-NEXT:    vmov q0[2], q0[0], r1, r0
75; CHECK-MVE-NEXT:    vmov r0, s4
76; CHECK-MVE-NEXT:    vmov r1, s6
77; CHECK-MVE-NEXT:    vmov q0[3], q0[1], r1, r0
78; CHECK-MVE-NEXT:    bx lr
79;
80; CHECK-MVEFP-LABEL: foo_uint32_float:
81; CHECK-MVEFP:       @ %bb.0: @ %entry
82; CHECK-MVEFP-NEXT:    vcvt.u32.f32 q0, q0
83; CHECK-MVEFP-NEXT:    bx lr
84entry:
85  %out = fptoui <4 x float> %src to <4 x i32>
86  ret <4 x i32> %out
87}
88
89define arm_aapcs_vfpcc <8 x half> @foo_half_int16(<8 x i16> %src) {
90; CHECK-MVE-LABEL: foo_half_int16:
91; CHECK-MVE:       @ %bb.0: @ %entry
92; CHECK-MVE-NEXT:    vmov q1, q0
93; CHECK-MVE-NEXT:    vmov.s16 r0, q0[0]
94; CHECK-MVE-NEXT:    vmov s0, r0
95; CHECK-MVE-NEXT:    vmov.s16 r0, q1[1]
96; CHECK-MVE-NEXT:    vmov s2, r0
97; CHECK-MVE-NEXT:    vcvt.f16.s32 s0, s0
98; CHECK-MVE-NEXT:    vcvt.f16.s32 s2, s2
99; CHECK-MVE-NEXT:    vmov.s16 r0, q1[3]
100; CHECK-MVE-NEXT:    vins.f16 s0, s2
101; CHECK-MVE-NEXT:    vmov s2, r0
102; CHECK-MVE-NEXT:    vmov.s16 r0, q1[2]
103; CHECK-MVE-NEXT:    vcvt.f16.s32 s2, s2
104; CHECK-MVE-NEXT:    vmov s8, r0
105; CHECK-MVE-NEXT:    vmov.s16 r0, q1[4]
106; CHECK-MVE-NEXT:    vcvt.f16.s32 s1, s8
107; CHECK-MVE-NEXT:    vins.f16 s1, s2
108; CHECK-MVE-NEXT:    vmov s2, r0
109; CHECK-MVE-NEXT:    vmov.s16 r0, q1[5]
110; CHECK-MVE-NEXT:    vcvt.f16.s32 s2, s2
111; CHECK-MVE-NEXT:    vmov s8, r0
112; CHECK-MVE-NEXT:    vmov.s16 r0, q1[7]
113; CHECK-MVE-NEXT:    vcvt.f16.s32 s8, s8
114; CHECK-MVE-NEXT:    vins.f16 s2, s8
115; CHECK-MVE-NEXT:    vmov s8, r0
116; CHECK-MVE-NEXT:    vmov.s16 r0, q1[6]
117; CHECK-MVE-NEXT:    vcvt.f16.s32 s8, s8
118; CHECK-MVE-NEXT:    vmov s4, r0
119; CHECK-MVE-NEXT:    vcvt.f16.s32 s3, s4
120; CHECK-MVE-NEXT:    vins.f16 s3, s8
121; CHECK-MVE-NEXT:    bx lr
122;
123; CHECK-MVEFP-LABEL: foo_half_int16:
124; CHECK-MVEFP:       @ %bb.0: @ %entry
125; CHECK-MVEFP-NEXT:    vcvt.f16.s16 q0, q0
126; CHECK-MVEFP-NEXT:    bx lr
127entry:
128  %out = sitofp <8 x i16> %src to <8 x half>
129  ret <8 x half> %out
130}
131
132define arm_aapcs_vfpcc <8 x half> @foo_half_uint16(<8 x i16> %src) {
133; CHECK-MVE-LABEL: foo_half_uint16:
134; CHECK-MVE:       @ %bb.0: @ %entry
135; CHECK-MVE-NEXT:    vmov q1, q0
136; CHECK-MVE-NEXT:    vmov.u16 r0, q0[0]
137; CHECK-MVE-NEXT:    vmov s0, r0
138; CHECK-MVE-NEXT:    vmov.u16 r0, q1[1]
139; CHECK-MVE-NEXT:    vmov s2, r0
140; CHECK-MVE-NEXT:    vcvt.f16.u32 s0, s0
141; CHECK-MVE-NEXT:    vcvt.f16.u32 s2, s2
142; CHECK-MVE-NEXT:    vmov.u16 r0, q1[3]
143; CHECK-MVE-NEXT:    vins.f16 s0, s2
144; CHECK-MVE-NEXT:    vmov s2, r0
145; CHECK-MVE-NEXT:    vmov.u16 r0, q1[2]
146; CHECK-MVE-NEXT:    vcvt.f16.u32 s2, s2
147; CHECK-MVE-NEXT:    vmov s8, r0
148; CHECK-MVE-NEXT:    vmov.u16 r0, q1[4]
149; CHECK-MVE-NEXT:    vcvt.f16.u32 s1, s8
150; CHECK-MVE-NEXT:    vins.f16 s1, s2
151; CHECK-MVE-NEXT:    vmov s2, r0
152; CHECK-MVE-NEXT:    vmov.u16 r0, q1[5]
153; CHECK-MVE-NEXT:    vcvt.f16.u32 s2, s2
154; CHECK-MVE-NEXT:    vmov s8, r0
155; CHECK-MVE-NEXT:    vmov.u16 r0, q1[7]
156; CHECK-MVE-NEXT:    vcvt.f16.u32 s8, s8
157; CHECK-MVE-NEXT:    vins.f16 s2, s8
158; CHECK-MVE-NEXT:    vmov s8, r0
159; CHECK-MVE-NEXT:    vmov.u16 r0, q1[6]
160; CHECK-MVE-NEXT:    vcvt.f16.u32 s8, s8
161; CHECK-MVE-NEXT:    vmov s4, r0
162; CHECK-MVE-NEXT:    vcvt.f16.u32 s3, s4
163; CHECK-MVE-NEXT:    vins.f16 s3, s8
164; CHECK-MVE-NEXT:    bx lr
165;
166; CHECK-MVEFP-LABEL: foo_half_uint16:
167; CHECK-MVEFP:       @ %bb.0: @ %entry
168; CHECK-MVEFP-NEXT:    vcvt.f16.u16 q0, q0
169; CHECK-MVEFP-NEXT:    bx lr
170entry:
171  %out = uitofp <8 x i16> %src to <8 x half>
172  ret <8 x half> %out
173}
174
175define arm_aapcs_vfpcc <8 x i16> @foo_int16_half(<8 x half> %src) {
176; CHECK-MVE-LABEL: foo_int16_half:
177; CHECK-MVE:       @ %bb.0: @ %entry
178; CHECK-MVE-NEXT:    vmovx.f16 s6, s2
179; CHECK-MVE-NEXT:    vcvt.s32.f16 s12, s2
180; CHECK-MVE-NEXT:    vmovx.f16 s2, s0
181; CHECK-MVE-NEXT:    vcvt.s32.f16 s0, s0
182; CHECK-MVE-NEXT:    vcvt.s32.f16 s14, s2
183; CHECK-MVE-NEXT:    vmov r0, s0
184; CHECK-MVE-NEXT:    vmovx.f16 s4, s3
185; CHECK-MVE-NEXT:    vmovx.f16 s10, s1
186; CHECK-MVE-NEXT:    vcvt.s32.f16 s8, s3
187; CHECK-MVE-NEXT:    vcvt.s32.f16 s5, s1
188; CHECK-MVE-NEXT:    vmov.16 q0[0], r0
189; CHECK-MVE-NEXT:    vmov r0, s14
190; CHECK-MVE-NEXT:    vmov.16 q0[1], r0
191; CHECK-MVE-NEXT:    vmov r0, s5
192; CHECK-MVE-NEXT:    vcvt.s32.f16 s10, s10
193; CHECK-MVE-NEXT:    vmov.16 q0[2], r0
194; CHECK-MVE-NEXT:    vmov r0, s10
195; CHECK-MVE-NEXT:    vcvt.s32.f16 s6, s6
196; CHECK-MVE-NEXT:    vmov.16 q0[3], r0
197; CHECK-MVE-NEXT:    vmov r0, s12
198; CHECK-MVE-NEXT:    vmov.16 q0[4], r0
199; CHECK-MVE-NEXT:    vmov r0, s6
200; CHECK-MVE-NEXT:    vmov.16 q0[5], r0
201; CHECK-MVE-NEXT:    vmov r0, s8
202; CHECK-MVE-NEXT:    vcvt.s32.f16 s4, s4
203; CHECK-MVE-NEXT:    vmov.16 q0[6], r0
204; CHECK-MVE-NEXT:    vmov r0, s4
205; CHECK-MVE-NEXT:    vmov.16 q0[7], r0
206; CHECK-MVE-NEXT:    bx lr
207;
208; CHECK-MVEFP-LABEL: foo_int16_half:
209; CHECK-MVEFP:       @ %bb.0: @ %entry
210; CHECK-MVEFP-NEXT:    vcvt.s16.f16 q0, q0
211; CHECK-MVEFP-NEXT:    bx lr
212entry:
213  %out = fptosi <8 x half> %src to <8 x i16>
214  ret <8 x i16> %out
215}
216
217define arm_aapcs_vfpcc <8 x i16> @foo_uint16_half(<8 x half> %src) {
218; CHECK-MVE-LABEL: foo_uint16_half:
219; CHECK-MVE:       @ %bb.0: @ %entry
220; CHECK-MVE-NEXT:    vmovx.f16 s6, s2
221; CHECK-MVE-NEXT:    vcvt.s32.f16 s12, s2
222; CHECK-MVE-NEXT:    vmovx.f16 s2, s0
223; CHECK-MVE-NEXT:    vcvt.s32.f16 s0, s0
224; CHECK-MVE-NEXT:    vcvt.s32.f16 s14, s2
225; CHECK-MVE-NEXT:    vmov r0, s0
226; CHECK-MVE-NEXT:    vmovx.f16 s4, s3
227; CHECK-MVE-NEXT:    vmovx.f16 s10, s1
228; CHECK-MVE-NEXT:    vcvt.s32.f16 s8, s3
229; CHECK-MVE-NEXT:    vcvt.s32.f16 s5, s1
230; CHECK-MVE-NEXT:    vmov.16 q0[0], r0
231; CHECK-MVE-NEXT:    vmov r0, s14
232; CHECK-MVE-NEXT:    vmov.16 q0[1], r0
233; CHECK-MVE-NEXT:    vmov r0, s5
234; CHECK-MVE-NEXT:    vcvt.s32.f16 s10, s10
235; CHECK-MVE-NEXT:    vmov.16 q0[2], r0
236; CHECK-MVE-NEXT:    vmov r0, s10
237; CHECK-MVE-NEXT:    vcvt.s32.f16 s6, s6
238; CHECK-MVE-NEXT:    vmov.16 q0[3], r0
239; CHECK-MVE-NEXT:    vmov r0, s12
240; CHECK-MVE-NEXT:    vmov.16 q0[4], r0
241; CHECK-MVE-NEXT:    vmov r0, s6
242; CHECK-MVE-NEXT:    vmov.16 q0[5], r0
243; CHECK-MVE-NEXT:    vmov r0, s8
244; CHECK-MVE-NEXT:    vcvt.s32.f16 s4, s4
245; CHECK-MVE-NEXT:    vmov.16 q0[6], r0
246; CHECK-MVE-NEXT:    vmov r0, s4
247; CHECK-MVE-NEXT:    vmov.16 q0[7], r0
248; CHECK-MVE-NEXT:    bx lr
249;
250; CHECK-MVEFP-LABEL: foo_uint16_half:
251; CHECK-MVEFP:       @ %bb.0: @ %entry
252; CHECK-MVEFP-NEXT:    vcvt.u16.f16 q0, q0
253; CHECK-MVEFP-NEXT:    bx lr
254entry:
255  %out = fptoui <8 x half> %src to <8 x i16>
256  ret <8 x i16> %out
257}
258
259define arm_aapcs_vfpcc <2 x double> @foo_float_int64(<2 x i64> %src) {
260; CHECK-LABEL: foo_float_int64:
261; CHECK:       @ %bb.0: @ %entry
262; CHECK-NEXT:    .save {r7, lr}
263; CHECK-NEXT:    push {r7, lr}
264; CHECK-NEXT:    .vsave {d8, d9}
265; CHECK-NEXT:    vpush {d8, d9}
266; CHECK-NEXT:    vmov q4, q0
267; CHECK-NEXT:    vmov r0, r1, d9
268; CHECK-NEXT:    bl __aeabi_l2d
269; CHECK-NEXT:    vmov r2, r3, d8
270; CHECK-NEXT:    vmov d9, r0, r1
271; CHECK-NEXT:    mov r0, r2
272; CHECK-NEXT:    mov r1, r3
273; CHECK-NEXT:    bl __aeabi_l2d
274; CHECK-NEXT:    vmov d8, r0, r1
275; CHECK-NEXT:    vmov q0, q4
276; CHECK-NEXT:    vpop {d8, d9}
277; CHECK-NEXT:    pop {r7, pc}
278entry:
279  %out = sitofp <2 x i64> %src to <2 x double>
280  ret <2 x double> %out
281}
282
283define arm_aapcs_vfpcc <2 x double> @foo_float_uint64(<2 x i64> %src) {
284; CHECK-LABEL: foo_float_uint64:
285; CHECK:       @ %bb.0: @ %entry
286; CHECK-NEXT:    .save {r7, lr}
287; CHECK-NEXT:    push {r7, lr}
288; CHECK-NEXT:    .vsave {d8, d9}
289; CHECK-NEXT:    vpush {d8, d9}
290; CHECK-NEXT:    vmov q4, q0
291; CHECK-NEXT:    vmov r0, r1, d9
292; CHECK-NEXT:    bl __aeabi_ul2d
293; CHECK-NEXT:    vmov r2, r3, d8
294; CHECK-NEXT:    vmov d9, r0, r1
295; CHECK-NEXT:    mov r0, r2
296; CHECK-NEXT:    mov r1, r3
297; CHECK-NEXT:    bl __aeabi_ul2d
298; CHECK-NEXT:    vmov d8, r0, r1
299; CHECK-NEXT:    vmov q0, q4
300; CHECK-NEXT:    vpop {d8, d9}
301; CHECK-NEXT:    pop {r7, pc}
302entry:
303  %out = uitofp <2 x i64> %src to <2 x double>
304  ret <2 x double> %out
305}
306
307define arm_aapcs_vfpcc <2 x i64> @foo_int64_float(<2 x double> %src) {
308; CHECK-LABEL: foo_int64_float:
309; CHECK:       @ %bb.0: @ %entry
310; CHECK-NEXT:    .save {r4, r5, r7, lr}
311; CHECK-NEXT:    push {r4, r5, r7, lr}
312; CHECK-NEXT:    .vsave {d8, d9}
313; CHECK-NEXT:    vpush {d8, d9}
314; CHECK-NEXT:    vmov q4, q0
315; CHECK-NEXT:    vmov r0, r1, d9
316; CHECK-NEXT:    bl __aeabi_d2lz
317; CHECK-NEXT:    mov r4, r0
318; CHECK-NEXT:    mov r5, r1
319; CHECK-NEXT:    vmov r0, r1, d8
320; CHECK-NEXT:    bl __aeabi_d2lz
321; CHECK-NEXT:    vmov q0[2], q0[0], r0, r4
322; CHECK-NEXT:    vmov q0[3], q0[1], r1, r5
323; CHECK-NEXT:    vpop {d8, d9}
324; CHECK-NEXT:    pop {r4, r5, r7, pc}
325entry:
326  %out = fptosi <2 x double> %src to <2 x i64>
327  ret <2 x i64> %out
328}
329
330define arm_aapcs_vfpcc <2 x i64> @foo_uint64_float(<2 x double> %src) {
331; CHECK-LABEL: foo_uint64_float:
332; CHECK:       @ %bb.0: @ %entry
333; CHECK-NEXT:    .save {r4, r5, r7, lr}
334; CHECK-NEXT:    push {r4, r5, r7, lr}
335; CHECK-NEXT:    .vsave {d8, d9}
336; CHECK-NEXT:    vpush {d8, d9}
337; CHECK-NEXT:    vmov q4, q0
338; CHECK-NEXT:    vmov r0, r1, d9
339; CHECK-NEXT:    bl __aeabi_d2ulz
340; CHECK-NEXT:    mov r4, r0
341; CHECK-NEXT:    mov r5, r1
342; CHECK-NEXT:    vmov r0, r1, d8
343; CHECK-NEXT:    bl __aeabi_d2ulz
344; CHECK-NEXT:    vmov q0[2], q0[0], r0, r4
345; CHECK-NEXT:    vmov q0[3], q0[1], r1, r5
346; CHECK-NEXT:    vpop {d8, d9}
347; CHECK-NEXT:    pop {r4, r5, r7, pc}
348entry:
349  %out = fptoui <2 x double> %src to <2 x i64>
350  ret <2 x i64> %out
351}
352
353define arm_aapcs_vfpcc <8 x half> @vmovn32_trunc1(<4 x float> %src1, <4 x float> %src2) {
354; CHECK-MVE-LABEL: vmovn32_trunc1:
355; CHECK-MVE:       @ %bb.0: @ %entry
356; CHECK-MVE-NEXT:    vcvtb.f16.f32 s0, s0
357; CHECK-MVE-NEXT:    vcvtb.f16.f32 s1, s1
358; CHECK-MVE-NEXT:    vcvtb.f16.f32 s2, s2
359; CHECK-MVE-NEXT:    vcvtb.f16.f32 s3, s3
360; CHECK-MVE-NEXT:    vcvtt.f16.f32 s0, s4
361; CHECK-MVE-NEXT:    vcvtt.f16.f32 s1, s5
362; CHECK-MVE-NEXT:    vcvtt.f16.f32 s2, s6
363; CHECK-MVE-NEXT:    vcvtt.f16.f32 s3, s7
364; CHECK-MVE-NEXT:    bx lr
365;
366; CHECK-MVEFP-LABEL: vmovn32_trunc1:
367; CHECK-MVEFP:       @ %bb.0: @ %entry
368; CHECK-MVEFP-NEXT:    vcvtb.f16.f32 q0, q0
369; CHECK-MVEFP-NEXT:    vcvtt.f16.f32 q0, q1
370; CHECK-MVEFP-NEXT:    bx lr
371entry:
372  %strided.vec = shufflevector <4 x float> %src1, <4 x float> %src2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
373  %out = fptrunc <8 x float> %strided.vec to <8 x half>
374  ret <8 x half> %out
375}
376
377define arm_aapcs_vfpcc <8 x half> @vmovn32_trunc2(<4 x float> %src1, <4 x float> %src2) {
378; CHECK-MVE-LABEL: vmovn32_trunc2:
379; CHECK-MVE:       @ %bb.0: @ %entry
380; CHECK-MVE-NEXT:    vmov q2, q0
381; CHECK-MVE-NEXT:    vcvtb.f16.f32 s0, s4
382; CHECK-MVE-NEXT:    vcvtb.f16.f32 s1, s5
383; CHECK-MVE-NEXT:    vcvtb.f16.f32 s2, s6
384; CHECK-MVE-NEXT:    vcvtb.f16.f32 s3, s7
385; CHECK-MVE-NEXT:    vcvtt.f16.f32 s0, s8
386; CHECK-MVE-NEXT:    vcvtt.f16.f32 s1, s9
387; CHECK-MVE-NEXT:    vcvtt.f16.f32 s2, s10
388; CHECK-MVE-NEXT:    vcvtt.f16.f32 s3, s11
389; CHECK-MVE-NEXT:    bx lr
390;
391; CHECK-MVEFP-LABEL: vmovn32_trunc2:
392; CHECK-MVEFP:       @ %bb.0: @ %entry
393; CHECK-MVEFP-NEXT:    vcvtb.f16.f32 q1, q1
394; CHECK-MVEFP-NEXT:    vcvtt.f16.f32 q1, q0
395; CHECK-MVEFP-NEXT:    vmov q0, q1
396; CHECK-MVEFP-NEXT:    bx lr
397entry:
398  %strided.vec = shufflevector <4 x float> %src1, <4 x float> %src2, <8 x i32> <i32 4, i32 0, i32 5, i32 1, i32 6, i32 2, i32 7, i32 3>
399  %out = fptrunc <8 x float> %strided.vec to <8 x half>
400  ret <8 x half> %out
401}
402