xref: /llvm-project/llvm/test/CodeGen/Thumb2/mve-vmovn.ll (revision 7b3bbd83c0c24087072ec5b22a76799ab31f87d5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-MVE
3; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-MVEFP
4; RUN: llc -mtriple=thumbebv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECKBE
5
6define arm_aapcs_vfpcc <8 x i16> @vmovn32_trunc1(<4 x i32> %src1, <4 x i32> %src2) {
7; CHECK-LABEL: vmovn32_trunc1:
8; CHECK:       @ %bb.0: @ %entry
9; CHECK-NEXT:    vmovnt.i32 q0, q1
10; CHECK-NEXT:    bx lr
11;
12; CHECKBE-LABEL: vmovn32_trunc1:
13; CHECKBE:       @ %bb.0: @ %entry
14; CHECKBE-NEXT:    vrev64.32 q2, q1
15; CHECKBE-NEXT:    vrev64.32 q1, q0
16; CHECKBE-NEXT:    vmovnt.i32 q1, q2
17; CHECKBE-NEXT:    vrev64.16 q0, q1
18; CHECKBE-NEXT:    bx lr
19entry:
20  %strided.vec = shufflevector <4 x i32> %src1, <4 x i32> %src2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
21  %out = trunc <8 x i32> %strided.vec to <8 x i16>
22  ret <8 x i16> %out
23}
24
25define arm_aapcs_vfpcc <8 x i16> @vmovn32_trunc2(<4 x i32> %src1, <4 x i32> %src2) {
26; CHECK-LABEL: vmovn32_trunc2:
27; CHECK:       @ %bb.0: @ %entry
28; CHECK-NEXT:    vmovnt.i32 q1, q0
29; CHECK-NEXT:    vmov q0, q1
30; CHECK-NEXT:    bx lr
31;
32; CHECKBE-LABEL: vmovn32_trunc2:
33; CHECKBE:       @ %bb.0: @ %entry
34; CHECKBE-NEXT:    vrev64.32 q2, q0
35; CHECKBE-NEXT:    vrev64.32 q3, q1
36; CHECKBE-NEXT:    vmovnt.i32 q3, q2
37; CHECKBE-NEXT:    vrev64.16 q0, q3
38; CHECKBE-NEXT:    bx lr
39entry:
40  %strided.vec = shufflevector <4 x i32> %src1, <4 x i32> %src2, <8 x i32> <i32 4, i32 0, i32 5, i32 1, i32 6, i32 2, i32 7, i32 3>
41  %out = trunc <8 x i32> %strided.vec to <8 x i16>
42  ret <8 x i16> %out
43}
44
45define arm_aapcs_vfpcc <8 x i16> @vmovn32_trunc3(<4 x i32> %src1) {
46; CHECK-LABEL: vmovn32_trunc3:
47; CHECK:       @ %bb.0: @ %entry
48; CHECK-NEXT:    vmovnt.i32 q0, q0
49; CHECK-NEXT:    bx lr
50;
51; CHECKBE-LABEL: vmovn32_trunc3:
52; CHECKBE:       @ %bb.0: @ %entry
53; CHECKBE-NEXT:    vrev64.32 q1, q0
54; CHECKBE-NEXT:    vmovnt.i32 q1, q1
55; CHECKBE-NEXT:    vrev64.16 q0, q1
56; CHECKBE-NEXT:    bx lr
57entry:
58  %strided.vec = shufflevector <4 x i32> %src1, <4 x i32> undef, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
59  %out = trunc <8 x i32> %strided.vec to <8 x i16>
60  ret <8 x i16> %out
61}
62
63define arm_aapcs_vfpcc <8 x i16> @vmovn32_trunc1_viabitcast(<4 x i32> %src1, <4 x i32> %src2) {
64; CHECK-MVE-LABEL: vmovn32_trunc1_viabitcast:
65; CHECK-MVE:       @ %bb.0: @ %entry
66; CHECK-MVE-NEXT:    .pad #16
67; CHECK-MVE-NEXT:    sub sp, #16
68; CHECK-MVE-NEXT:    vmov.f32 s8, s2
69; CHECK-MVE-NEXT:    mov r0, sp
70; CHECK-MVE-NEXT:    vmov.f32 s9, s6
71; CHECK-MVE-NEXT:    vmov.f32 s10, s3
72; CHECK-MVE-NEXT:    vmov.f32 s11, s7
73; CHECK-MVE-NEXT:    vstrh.32 q2, [r0, #8]
74; CHECK-MVE-NEXT:    vmov.f32 s8, s0
75; CHECK-MVE-NEXT:    vmov.f32 s9, s4
76; CHECK-MVE-NEXT:    vmov.f32 s10, s1
77; CHECK-MVE-NEXT:    vmov.f32 s11, s5
78; CHECK-MVE-NEXT:    vstrh.32 q2, [r0]
79; CHECK-MVE-NEXT:    vldrw.u32 q0, [r0]
80; CHECK-MVE-NEXT:    add sp, #16
81; CHECK-MVE-NEXT:    bx lr
82;
83; CHECK-MVEFP-LABEL: vmovn32_trunc1_viabitcast:
84; CHECK-MVEFP:       @ %bb.0: @ %entry
85; CHECK-MVEFP-NEXT:    .pad #16
86; CHECK-MVEFP-NEXT:    sub sp, #16
87; CHECK-MVEFP-NEXT:    mov r0, sp
88; CHECK-MVEFP-NEXT:    vmov.f32 s8, s2
89; CHECK-MVEFP-NEXT:    vmov.f32 s9, s6
90; CHECK-MVEFP-NEXT:    vmov.f32 s10, s3
91; CHECK-MVEFP-NEXT:    vmov.f32 s11, s7
92; CHECK-MVEFP-NEXT:    vstrh.32 q2, [r0, #8]
93; CHECK-MVEFP-NEXT:    vmov.f32 s8, s0
94; CHECK-MVEFP-NEXT:    vmov.f32 s9, s4
95; CHECK-MVEFP-NEXT:    vmov.f32 s10, s1
96; CHECK-MVEFP-NEXT:    vmov.f32 s11, s5
97; CHECK-MVEFP-NEXT:    vstrh.32 q2, [r0]
98; CHECK-MVEFP-NEXT:    vldrw.u32 q0, [r0]
99; CHECK-MVEFP-NEXT:    add sp, #16
100; CHECK-MVEFP-NEXT:    bx lr
101;
102; CHECKBE-LABEL: vmovn32_trunc1_viabitcast:
103; CHECKBE:       @ %bb.0: @ %entry
104; CHECKBE-NEXT:    .pad #16
105; CHECKBE-NEXT:    sub sp, #16
106; CHECKBE-NEXT:    vrev64.32 q2, q1
107; CHECKBE-NEXT:    vrev64.32 q1, q0
108; CHECKBE-NEXT:    mov r0, sp
109; CHECKBE-NEXT:    vmov.f32 s0, s6
110; CHECKBE-NEXT:    vmov.f32 s1, s10
111; CHECKBE-NEXT:    vmov.f32 s2, s7
112; CHECKBE-NEXT:    vmov.f32 s3, s11
113; CHECKBE-NEXT:    vstrh.32 q0, [r0, #8]
114; CHECKBE-NEXT:    vmov.f32 s0, s4
115; CHECKBE-NEXT:    vmov.f32 s1, s8
116; CHECKBE-NEXT:    vmov.f32 s2, s5
117; CHECKBE-NEXT:    vmov.f32 s3, s9
118; CHECKBE-NEXT:    vstrh.32 q0, [r0]
119; CHECKBE-NEXT:    vldrb.u8 q1, [r0]
120; CHECKBE-NEXT:    vrev64.8 q0, q1
121; CHECKBE-NEXT:    add sp, #16
122; CHECKBE-NEXT:    bx lr
123entry:
124  %b1 = bitcast <4 x i32> %src1 to <8 x i16>
125  %b2 = bitcast <4 x i32> %src2 to <8 x i16>
126  %s = shufflevector <8 x i16> %b1, <8 x i16> %b2, <16 x i32> <i32 0, i32 1, i32 8, i32 9, i32 2, i32 3, i32 10, i32 11, i32 4, i32 5, i32 12, i32 13, i32 6, i32 7, i32 14, i32 15>
127  %b3 = bitcast <16 x i16> %s to <8 x i32>
128  %out = trunc <8 x i32> %b3 to <8 x i16>
129  ret <8 x i16> %out
130}
131
132
133define arm_aapcs_vfpcc <16 x i8> @vmovn16_trunc1(<8 x i16> %src1, <8 x i16> %src2) {
134; CHECK-LABEL: vmovn16_trunc1:
135; CHECK:       @ %bb.0: @ %entry
136; CHECK-NEXT:    vmovnt.i16 q0, q1
137; CHECK-NEXT:    bx lr
138;
139; CHECKBE-LABEL: vmovn16_trunc1:
140; CHECKBE:       @ %bb.0: @ %entry
141; CHECKBE-NEXT:    vrev64.16 q2, q1
142; CHECKBE-NEXT:    vrev64.16 q1, q0
143; CHECKBE-NEXT:    vmovnt.i16 q1, q2
144; CHECKBE-NEXT:    vrev64.8 q0, q1
145; CHECKBE-NEXT:    bx lr
146entry:
147  %strided.vec = shufflevector <8 x i16> %src1, <8 x i16> %src2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
148  %out = trunc <16 x i16> %strided.vec to <16 x i8>
149  ret <16 x i8> %out
150}
151
152define arm_aapcs_vfpcc <16 x i8> @vmovn16_trunc2(<8 x i16> %src1, <8 x i16> %src2) {
153; CHECK-LABEL: vmovn16_trunc2:
154; CHECK:       @ %bb.0: @ %entry
155; CHECK-NEXT:    vmovnt.i16 q1, q0
156; CHECK-NEXT:    vmov q0, q1
157; CHECK-NEXT:    bx lr
158;
159; CHECKBE-LABEL: vmovn16_trunc2:
160; CHECKBE:       @ %bb.0: @ %entry
161; CHECKBE-NEXT:    vrev64.16 q2, q0
162; CHECKBE-NEXT:    vrev64.16 q3, q1
163; CHECKBE-NEXT:    vmovnt.i16 q3, q2
164; CHECKBE-NEXT:    vrev64.8 q0, q3
165; CHECKBE-NEXT:    bx lr
166entry:
167  %strided.vec = shufflevector <8 x i16> %src1, <8 x i16> %src2, <16 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3, i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7>
168  %out = trunc <16 x i16> %strided.vec to <16 x i8>
169  ret <16 x i8> %out
170}
171
172define arm_aapcs_vfpcc <16 x i8> @vmovn16_trunc3(<8 x i16> %src1) {
173; CHECK-LABEL: vmovn16_trunc3:
174; CHECK:       @ %bb.0: @ %entry
175; CHECK-NEXT:    vmovnt.i16 q0, q0
176; CHECK-NEXT:    bx lr
177;
178; CHECKBE-LABEL: vmovn16_trunc3:
179; CHECKBE:       @ %bb.0: @ %entry
180; CHECKBE-NEXT:    vrev64.16 q1, q0
181; CHECKBE-NEXT:    vmovnt.i16 q1, q1
182; CHECKBE-NEXT:    vrev64.8 q0, q1
183; CHECKBE-NEXT:    bx lr
184entry:
185  %strided.vec = shufflevector <8 x i16> %src1, <8 x i16> undef, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
186  %out = trunc <16 x i16> %strided.vec to <16 x i8>
187  ret <16 x i8> %out
188}
189
190
191
192define arm_aapcs_vfpcc <2 x i64> @vmovn64_t1(<2 x i64> %src1, <2 x i64> %src2) {
193; CHECK-LABEL: vmovn64_t1:
194; CHECK:       @ %bb.0: @ %entry
195; CHECK-NEXT:    vmov.f32 s2, s4
196; CHECK-NEXT:    vmov.f32 s3, s5
197; CHECK-NEXT:    bx lr
198;
199; CHECKBE-LABEL: vmovn64_t1:
200; CHECKBE:       @ %bb.0: @ %entry
201; CHECKBE-NEXT:    vmov.f32 s2, s4
202; CHECKBE-NEXT:    vmov.f32 s3, s5
203; CHECKBE-NEXT:    bx lr
204entry:
205  %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 0, i32 2>
206  ret <2 x i64> %out
207}
208
209define arm_aapcs_vfpcc <2 x i64> @vmovn64_t2(<2 x i64> %src1, <2 x i64> %src2) {
210; CHECK-LABEL: vmovn64_t2:
211; CHECK:       @ %bb.0: @ %entry
212; CHECK-NEXT:    vmov.f32 s6, s0
213; CHECK-NEXT:    vmov.f32 s7, s1
214; CHECK-NEXT:    vmov q0, q1
215; CHECK-NEXT:    bx lr
216;
217; CHECKBE-LABEL: vmovn64_t2:
218; CHECKBE:       @ %bb.0: @ %entry
219; CHECKBE-NEXT:    vmov.f32 s6, s0
220; CHECKBE-NEXT:    vmov.f32 s7, s1
221; CHECKBE-NEXT:    vmov q0, q1
222; CHECKBE-NEXT:    bx lr
223entry:
224  %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 2, i32 0>
225  ret <2 x i64> %out
226}
227
228define arm_aapcs_vfpcc <2 x i64> @vmovn64_b1(<2 x i64> %src1, <2 x i64> %src2) {
229; CHECK-LABEL: vmovn64_b1:
230; CHECK:       @ %bb.0: @ %entry
231; CHECK-NEXT:    vmov.f32 s2, s6
232; CHECK-NEXT:    vmov.f32 s3, s7
233; CHECK-NEXT:    bx lr
234;
235; CHECKBE-LABEL: vmovn64_b1:
236; CHECKBE:       @ %bb.0: @ %entry
237; CHECKBE-NEXT:    vmov.f32 s2, s6
238; CHECKBE-NEXT:    vmov.f32 s3, s7
239; CHECKBE-NEXT:    bx lr
240entry:
241  %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 0, i32 3>
242  ret <2 x i64> %out
243}
244
245define arm_aapcs_vfpcc <2 x i64> @vmovn64_b2(<2 x i64> %src1, <2 x i64> %src2) {
246; CHECK-LABEL: vmovn64_b2:
247; CHECK:       @ %bb.0: @ %entry
248; CHECK-NEXT:    vmov.f32 s4, s6
249; CHECK-NEXT:    vmov.f32 s6, s0
250; CHECK-NEXT:    vmov.f32 s5, s7
251; CHECK-NEXT:    vmov.f32 s7, s1
252; CHECK-NEXT:    vmov q0, q1
253; CHECK-NEXT:    bx lr
254;
255; CHECKBE-LABEL: vmovn64_b2:
256; CHECKBE:       @ %bb.0: @ %entry
257; CHECKBE-NEXT:    vmov.f32 s4, s6
258; CHECKBE-NEXT:    vmov.f32 s6, s0
259; CHECKBE-NEXT:    vmov.f32 s5, s7
260; CHECKBE-NEXT:    vmov.f32 s7, s1
261; CHECKBE-NEXT:    vmov q0, q1
262; CHECKBE-NEXT:    bx lr
263entry:
264  %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 3, i32 0>
265  ret <2 x i64> %out
266}
267
268define arm_aapcs_vfpcc <2 x i64> @vmovn64_b3(<2 x i64> %src1, <2 x i64> %src2) {
269; CHECK-LABEL: vmovn64_b3:
270; CHECK:       @ %bb.0: @ %entry
271; CHECK-NEXT:    vmov.f32 s0, s2
272; CHECK-NEXT:    vmov.f32 s2, s4
273; CHECK-NEXT:    vmov.f32 s1, s3
274; CHECK-NEXT:    vmov.f32 s3, s5
275; CHECK-NEXT:    bx lr
276;
277; CHECKBE-LABEL: vmovn64_b3:
278; CHECKBE:       @ %bb.0: @ %entry
279; CHECKBE-NEXT:    vmov.f32 s0, s2
280; CHECKBE-NEXT:    vmov.f32 s2, s4
281; CHECKBE-NEXT:    vmov.f32 s1, s3
282; CHECKBE-NEXT:    vmov.f32 s3, s5
283; CHECKBE-NEXT:    bx lr
284entry:
285  %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 1, i32 2>
286  ret <2 x i64> %out
287}
288
289define arm_aapcs_vfpcc <2 x i64> @vmovn64_b4(<2 x i64> %src1, <2 x i64> %src2) {
290; CHECK-LABEL: vmovn64_b4:
291; CHECK:       @ %bb.0: @ %entry
292; CHECK-NEXT:    vmov.f32 s6, s2
293; CHECK-NEXT:    vmov.f32 s7, s3
294; CHECK-NEXT:    vmov q0, q1
295; CHECK-NEXT:    bx lr
296;
297; CHECKBE-LABEL: vmovn64_b4:
298; CHECKBE:       @ %bb.0: @ %entry
299; CHECKBE-NEXT:    vmov.f32 s6, s2
300; CHECKBE-NEXT:    vmov.f32 s7, s3
301; CHECKBE-NEXT:    vmov q0, q1
302; CHECKBE-NEXT:    bx lr
303entry:
304  %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 2, i32 1>
305  ret <2 x i64> %out
306}
307
308
309
310define arm_aapcs_vfpcc <4 x i32> @vmovn32_t1(<4 x i32> %src1, <4 x i32> %src2) {
311; CHECK-LABEL: vmovn32_t1:
312; CHECK:       @ %bb.0: @ %entry
313; CHECK-NEXT:    vmov.f32 s1, s4
314; CHECK-NEXT:    vmov.f32 s3, s6
315; CHECK-NEXT:    bx lr
316;
317; CHECKBE-LABEL: vmovn32_t1:
318; CHECKBE:       @ %bb.0: @ %entry
319; CHECKBE-NEXT:    vrev64.32 q2, q1
320; CHECKBE-NEXT:    vrev64.32 q1, q0
321; CHECKBE-NEXT:    vmov.f32 s5, s8
322; CHECKBE-NEXT:    vmov.f32 s7, s10
323; CHECKBE-NEXT:    vrev64.32 q0, q1
324; CHECKBE-NEXT:    bx lr
325entry:
326  %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
327  ret <4 x i32> %out
328}
329
330define arm_aapcs_vfpcc <4 x i32> @vmovn32_t2(<4 x i32> %src1, <4 x i32> %src2) {
331; CHECK-LABEL: vmovn32_t2:
332; CHECK:       @ %bb.0: @ %entry
333; CHECK-NEXT:    vmov.f32 s5, s0
334; CHECK-NEXT:    vmov.f32 s7, s2
335; CHECK-NEXT:    vmov q0, q1
336; CHECK-NEXT:    bx lr
337;
338; CHECKBE-LABEL: vmovn32_t2:
339; CHECKBE:       @ %bb.0: @ %entry
340; CHECKBE-NEXT:    vrev64.32 q2, q0
341; CHECKBE-NEXT:    vrev64.32 q3, q1
342; CHECKBE-NEXT:    vmov.f32 s13, s8
343; CHECKBE-NEXT:    vmov.f32 s15, s10
344; CHECKBE-NEXT:    vrev64.32 q0, q3
345; CHECKBE-NEXT:    bx lr
346entry:
347  %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 4, i32 0, i32 6, i32 2>
348  ret <4 x i32> %out
349}
350
351define arm_aapcs_vfpcc <4 x i32> @vmovn32_b1(<4 x i32> %src1, <4 x i32> %src2) {
352; CHECK-LABEL: vmovn32_b1:
353; CHECK:       @ %bb.0: @ %entry
354; CHECK-NEXT:    vmov.f32 s1, s5
355; CHECK-NEXT:    vmov.f32 s3, s7
356; CHECK-NEXT:    bx lr
357;
358; CHECKBE-LABEL: vmovn32_b1:
359; CHECKBE:       @ %bb.0: @ %entry
360; CHECKBE-NEXT:    vrev64.32 q2, q1
361; CHECKBE-NEXT:    vrev64.32 q1, q0
362; CHECKBE-NEXT:    vmov.f32 s5, s9
363; CHECKBE-NEXT:    vmov.f32 s7, s11
364; CHECKBE-NEXT:    vrev64.32 q0, q1
365; CHECKBE-NEXT:    bx lr
366entry:
367  %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
368  ret <4 x i32> %out
369}
370
371define arm_aapcs_vfpcc <4 x i32> @vmovn32_b2(<4 x i32> %src1, <4 x i32> %src2) {
372; CHECK-LABEL: vmovn32_b2:
373; CHECK:       @ %bb.0: @ %entry
374; CHECK-NEXT:    vmov.f32 s4, s5
375; CHECK-NEXT:    vmov.f32 s6, s7
376; CHECK-NEXT:    vmov.f32 s5, s0
377; CHECK-NEXT:    vmov.f32 s7, s2
378; CHECK-NEXT:    vmov q0, q1
379; CHECK-NEXT:    bx lr
380;
381; CHECKBE-LABEL: vmovn32_b2:
382; CHECKBE:       @ %bb.0: @ %entry
383; CHECKBE-NEXT:    vrev64.32 q2, q0
384; CHECKBE-NEXT:    vrev64.32 q0, q1
385; CHECKBE-NEXT:    vmov.f32 s4, s1
386; CHECKBE-NEXT:    vmov.f32 s5, s8
387; CHECKBE-NEXT:    vmov.f32 s6, s3
388; CHECKBE-NEXT:    vmov.f32 s7, s10
389; CHECKBE-NEXT:    vrev64.32 q0, q1
390; CHECKBE-NEXT:    bx lr
391entry:
392  %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 5, i32 0, i32 7, i32 2>
393  ret <4 x i32> %out
394}
395
396define arm_aapcs_vfpcc <4 x i32> @vmovn32_b3(<4 x i32> %src1, <4 x i32> %src2) {
397; CHECK-LABEL: vmovn32_b3:
398; CHECK:       @ %bb.0: @ %entry
399; CHECK-NEXT:    vmov.f32 s0, s1
400; CHECK-NEXT:    vmov.f32 s2, s3
401; CHECK-NEXT:    vmov.f32 s1, s4
402; CHECK-NEXT:    vmov.f32 s3, s6
403; CHECK-NEXT:    bx lr
404;
405; CHECKBE-LABEL: vmovn32_b3:
406; CHECKBE:       @ %bb.0: @ %entry
407; CHECKBE-NEXT:    vrev64.32 q2, q1
408; CHECKBE-NEXT:    vrev64.32 q1, q0
409; CHECKBE-NEXT:    vmov.f32 s4, s5
410; CHECKBE-NEXT:    vmov.f32 s6, s7
411; CHECKBE-NEXT:    vmov.f32 s5, s8
412; CHECKBE-NEXT:    vmov.f32 s7, s10
413; CHECKBE-NEXT:    vrev64.32 q0, q1
414; CHECKBE-NEXT:    bx lr
415entry:
416  %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 1, i32 4, i32 3, i32 6>
417  ret <4 x i32> %out
418}
419
420define arm_aapcs_vfpcc <4 x i32> @vmovn32_b4(<4 x i32> %src1, <4 x i32> %src2) {
421; CHECK-LABEL: vmovn32_b4:
422; CHECK:       @ %bb.0: @ %entry
423; CHECK-NEXT:    vmov.f32 s5, s1
424; CHECK-NEXT:    vmov.f32 s7, s3
425; CHECK-NEXT:    vmov q0, q1
426; CHECK-NEXT:    bx lr
427;
428; CHECKBE-LABEL: vmovn32_b4:
429; CHECKBE:       @ %bb.0: @ %entry
430; CHECKBE-NEXT:    vrev64.32 q2, q0
431; CHECKBE-NEXT:    vrev64.32 q3, q1
432; CHECKBE-NEXT:    vmov.f32 s13, s9
433; CHECKBE-NEXT:    vmov.f32 s15, s11
434; CHECKBE-NEXT:    vrev64.32 q0, q3
435; CHECKBE-NEXT:    bx lr
436entry:
437  %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
438  ret <4 x i32> %out
439}
440
441define arm_aapcs_vfpcc <4 x i32> @vmovn32_single_t(<4 x i32> %src1) {
442; CHECK-LABEL: vmovn32_single_t:
443; CHECK:       @ %bb.0: @ %entry
444; CHECK-NEXT:    vmov.f32 s1, s0
445; CHECK-NEXT:    vmov.f32 s3, s2
446; CHECK-NEXT:    bx lr
447;
448; CHECKBE-LABEL: vmovn32_single_t:
449; CHECKBE:       @ %bb.0: @ %entry
450; CHECKBE-NEXT:    vrev64.32 q1, q0
451; CHECKBE-NEXT:    vmov.f32 s5, s4
452; CHECKBE-NEXT:    vmov.f32 s7, s6
453; CHECKBE-NEXT:    vrev64.32 q0, q1
454; CHECKBE-NEXT:    bx lr
455entry:
456  %out = shufflevector <4 x i32> %src1, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
457  ret <4 x i32> %out
458}
459
460
461
462
463define arm_aapcs_vfpcc <8 x i16> @vmovn16_t1(<8 x i16> %src1, <8 x i16> %src2) {
464; CHECK-LABEL: vmovn16_t1:
465; CHECK:       @ %bb.0: @ %entry
466; CHECK-NEXT:    vmovnt.i32 q0, q1
467; CHECK-NEXT:    bx lr
468;
469; CHECKBE-LABEL: vmovn16_t1:
470; CHECKBE:       @ %bb.0: @ %entry
471; CHECKBE-NEXT:    vrev64.16 q2, q1
472; CHECKBE-NEXT:    vrev64.16 q1, q0
473; CHECKBE-NEXT:    vmovnt.i32 q1, q2
474; CHECKBE-NEXT:    vrev64.16 q0, q1
475; CHECKBE-NEXT:    bx lr
476entry:
477  %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
478  ret <8 x i16> %out
479}
480
481define arm_aapcs_vfpcc <8 x i16> @vmovn16_t2(<8 x i16> %src1, <8 x i16> %src2) {
482; CHECK-LABEL: vmovn16_t2:
483; CHECK:       @ %bb.0: @ %entry
484; CHECK-NEXT:    vmovnt.i32 q1, q0
485; CHECK-NEXT:    vmov q0, q1
486; CHECK-NEXT:    bx lr
487;
488; CHECKBE-LABEL: vmovn16_t2:
489; CHECKBE:       @ %bb.0: @ %entry
490; CHECKBE-NEXT:    vrev64.16 q2, q0
491; CHECKBE-NEXT:    vrev64.16 q3, q1
492; CHECKBE-NEXT:    vmovnt.i32 q3, q2
493; CHECKBE-NEXT:    vrev64.16 q0, q3
494; CHECKBE-NEXT:    bx lr
495entry:
496  %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 8, i32 0, i32 10, i32 2, i32 12, i32 4, i32 14, i32 6>
497  ret <8 x i16> %out
498}
499
500define arm_aapcs_vfpcc <8 x i16> @vmovn16_b1(<8 x i16> %src1, <8 x i16> %src2) {
501; CHECK-LABEL: vmovn16_b1:
502; CHECK:       @ %bb.0: @ %entry
503; CHECK-NEXT:    vmovnb.i32 q1, q0
504; CHECK-NEXT:    vmov q0, q1
505; CHECK-NEXT:    bx lr
506;
507; CHECKBE-LABEL: vmovn16_b1:
508; CHECKBE:       @ %bb.0: @ %entry
509; CHECKBE-NEXT:    vrev64.16 q2, q0
510; CHECKBE-NEXT:    vrev64.16 q3, q1
511; CHECKBE-NEXT:    vmovnb.i32 q3, q2
512; CHECKBE-NEXT:    vrev64.16 q0, q3
513; CHECKBE-NEXT:    bx lr
514entry:
515  %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
516  ret <8 x i16> %out
517}
518
519define arm_aapcs_vfpcc <8 x i16> @vmovn16_b2(<8 x i16> %src1, <8 x i16> %src2) {
520; CHECK-MVE-LABEL: vmovn16_b2:
521; CHECK-MVE:       @ %bb.0: @ %entry
522; CHECK-MVE-NEXT:    vmov q2, q0
523; CHECK-MVE-NEXT:    vmov.u16 r0, q1[1]
524; CHECK-MVE-NEXT:    vmov.16 q0[0], r0
525; CHECK-MVE-NEXT:    vmov.u16 r0, q2[0]
526; CHECK-MVE-NEXT:    vmov.16 q0[1], r0
527; CHECK-MVE-NEXT:    vmov.u16 r0, q1[3]
528; CHECK-MVE-NEXT:    vmov.16 q0[2], r0
529; CHECK-MVE-NEXT:    vmov.u16 r0, q2[2]
530; CHECK-MVE-NEXT:    vmov.16 q0[3], r0
531; CHECK-MVE-NEXT:    vmov.u16 r0, q1[5]
532; CHECK-MVE-NEXT:    vmov.16 q0[4], r0
533; CHECK-MVE-NEXT:    vmov.u16 r0, q2[4]
534; CHECK-MVE-NEXT:    vmov.16 q0[5], r0
535; CHECK-MVE-NEXT:    vmov.u16 r0, q1[7]
536; CHECK-MVE-NEXT:    vmov.16 q0[6], r0
537; CHECK-MVE-NEXT:    vmov.u16 r0, q2[6]
538; CHECK-MVE-NEXT:    vmov.16 q0[7], r0
539; CHECK-MVE-NEXT:    bx lr
540;
541; CHECK-MVEFP-LABEL: vmovn16_b2:
542; CHECK-MVEFP:       @ %bb.0: @ %entry
543; CHECK-MVEFP-NEXT:    vmovx.f16 s4, s4
544; CHECK-MVEFP-NEXT:    vmovx.f16 s5, s5
545; CHECK-MVEFP-NEXT:    vmovx.f16 s6, s6
546; CHECK-MVEFP-NEXT:    vmovx.f16 s7, s7
547; CHECK-MVEFP-NEXT:    vins.f16 s4, s0
548; CHECK-MVEFP-NEXT:    vins.f16 s5, s1
549; CHECK-MVEFP-NEXT:    vins.f16 s6, s2
550; CHECK-MVEFP-NEXT:    vins.f16 s7, s3
551; CHECK-MVEFP-NEXT:    vmov q0, q1
552; CHECK-MVEFP-NEXT:    bx lr
553;
554; CHECKBE-LABEL: vmovn16_b2:
555; CHECKBE:       @ %bb.0: @ %entry
556; CHECKBE-NEXT:    vrev64.16 q2, q0
557; CHECKBE-NEXT:    vrev64.16 q0, q1
558; CHECKBE-NEXT:    vmovx.f16 s4, s0
559; CHECKBE-NEXT:    vmovx.f16 s5, s1
560; CHECKBE-NEXT:    vmovx.f16 s6, s2
561; CHECKBE-NEXT:    vmovx.f16 s7, s3
562; CHECKBE-NEXT:    vins.f16 s4, s8
563; CHECKBE-NEXT:    vins.f16 s5, s9
564; CHECKBE-NEXT:    vins.f16 s6, s10
565; CHECKBE-NEXT:    vins.f16 s7, s11
566; CHECKBE-NEXT:    vrev64.16 q0, q1
567; CHECKBE-NEXT:    bx lr
568entry:
569  %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 9, i32 0, i32 11, i32 2, i32 13, i32 4, i32 15, i32 6>
570  ret <8 x i16> %out
571}
572
573define arm_aapcs_vfpcc <8 x i16> @vmovn16_b3(<8 x i16> %src1, <8 x i16> %src2) {
574; CHECK-MVE-LABEL: vmovn16_b3:
575; CHECK-MVE:       @ %bb.0: @ %entry
576; CHECK-MVE-NEXT:    vmov.u16 r0, q0[1]
577; CHECK-MVE-NEXT:    vmov q2, q0
578; CHECK-MVE-NEXT:    vmov.16 q0[0], r0
579; CHECK-MVE-NEXT:    vmov.u16 r0, q1[0]
580; CHECK-MVE-NEXT:    vmov.16 q0[1], r0
581; CHECK-MVE-NEXT:    vmov.u16 r0, q2[3]
582; CHECK-MVE-NEXT:    vmov.16 q0[2], r0
583; CHECK-MVE-NEXT:    vmov.u16 r0, q1[2]
584; CHECK-MVE-NEXT:    vmov.16 q0[3], r0
585; CHECK-MVE-NEXT:    vmov.u16 r0, q2[5]
586; CHECK-MVE-NEXT:    vmov.16 q0[4], r0
587; CHECK-MVE-NEXT:    vmov.u16 r0, q1[4]
588; CHECK-MVE-NEXT:    vmov.16 q0[5], r0
589; CHECK-MVE-NEXT:    vmov.u16 r0, q2[7]
590; CHECK-MVE-NEXT:    vmov.16 q0[6], r0
591; CHECK-MVE-NEXT:    vmov.u16 r0, q1[6]
592; CHECK-MVE-NEXT:    vmov.16 q0[7], r0
593; CHECK-MVE-NEXT:    bx lr
594;
595; CHECK-MVEFP-LABEL: vmovn16_b3:
596; CHECK-MVEFP:       @ %bb.0: @ %entry
597; CHECK-MVEFP-NEXT:    vmovx.f16 s0, s0
598; CHECK-MVEFP-NEXT:    vmovx.f16 s1, s1
599; CHECK-MVEFP-NEXT:    vmovx.f16 s2, s2
600; CHECK-MVEFP-NEXT:    vmovx.f16 s3, s3
601; CHECK-MVEFP-NEXT:    vins.f16 s0, s4
602; CHECK-MVEFP-NEXT:    vins.f16 s1, s5
603; CHECK-MVEFP-NEXT:    vins.f16 s2, s6
604; CHECK-MVEFP-NEXT:    vins.f16 s3, s7
605; CHECK-MVEFP-NEXT:    bx lr
606;
607; CHECKBE-LABEL: vmovn16_b3:
608; CHECKBE:       @ %bb.0: @ %entry
609; CHECKBE-NEXT:    vrev64.16 q2, q1
610; CHECKBE-NEXT:    vrev64.16 q1, q0
611; CHECKBE-NEXT:    vmovx.f16 s4, s4
612; CHECKBE-NEXT:    vmovx.f16 s5, s5
613; CHECKBE-NEXT:    vmovx.f16 s6, s6
614; CHECKBE-NEXT:    vmovx.f16 s7, s7
615; CHECKBE-NEXT:    vins.f16 s4, s8
616; CHECKBE-NEXT:    vins.f16 s5, s9
617; CHECKBE-NEXT:    vins.f16 s6, s10
618; CHECKBE-NEXT:    vins.f16 s7, s11
619; CHECKBE-NEXT:    vrev64.16 q0, q1
620; CHECKBE-NEXT:    bx lr
621entry:
622  %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 1, i32 8, i32 3, i32 10, i32 5, i32 12, i32 7, i32 14>
623  ret <8 x i16> %out
624}
625
626define arm_aapcs_vfpcc <8 x i16> @vmovn16_b4(<8 x i16> %src1, <8 x i16> %src2) {
627; CHECK-LABEL: vmovn16_b4:
628; CHECK:       @ %bb.0: @ %entry
629; CHECK-NEXT:    vmovnb.i32 q0, q1
630; CHECK-NEXT:    bx lr
631;
632; CHECKBE-LABEL: vmovn16_b4:
633; CHECKBE:       @ %bb.0: @ %entry
634; CHECKBE-NEXT:    vrev64.16 q2, q1
635; CHECKBE-NEXT:    vrev64.16 q1, q0
636; CHECKBE-NEXT:    vmovnb.i32 q1, q2
637; CHECKBE-NEXT:    vrev64.16 q0, q1
638; CHECKBE-NEXT:    bx lr
639entry:
640  %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
641  ret <8 x i16> %out
642}
643
644define arm_aapcs_vfpcc <8 x i16> @vmovn16_single_t(<8 x i16> %src1) {
645; CHECK-LABEL: vmovn16_single_t:
646; CHECK:       @ %bb.0: @ %entry
647; CHECK-NEXT:    vmovnt.i32 q0, q0
648; CHECK-NEXT:    bx lr
649;
650; CHECKBE-LABEL: vmovn16_single_t:
651; CHECKBE:       @ %bb.0: @ %entry
652; CHECKBE-NEXT:    vrev64.16 q1, q0
653; CHECKBE-NEXT:    vmovnt.i32 q1, q1
654; CHECKBE-NEXT:    vrev64.16 q0, q1
655; CHECKBE-NEXT:    bx lr
656entry:
657  %out = shufflevector <8 x i16> %src1, <8 x i16> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
658  ret <8 x i16> %out
659}
660
661
662define arm_aapcs_vfpcc <16 x i8> @vmovn8_b1(<16 x i8> %src1, <16 x i8> %src2) {
663; CHECK-LABEL: vmovn8_b1:
664; CHECK:       @ %bb.0: @ %entry
665; CHECK-NEXT:    vmovnt.i16 q0, q1
666; CHECK-NEXT:    bx lr
667;
668; CHECKBE-LABEL: vmovn8_b1:
669; CHECKBE:       @ %bb.0: @ %entry
670; CHECKBE-NEXT:    vrev64.8 q2, q1
671; CHECKBE-NEXT:    vrev64.8 q1, q0
672; CHECKBE-NEXT:    vmovnt.i16 q1, q2
673; CHECKBE-NEXT:    vrev64.8 q0, q1
674; CHECKBE-NEXT:    bx lr
675entry:
676  %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
677  ret <16 x i8> %out
678}
679
680define arm_aapcs_vfpcc <16 x i8> @vmovn8_b2(<16 x i8> %src1, <16 x i8> %src2) {
681; CHECK-LABEL: vmovn8_b2:
682; CHECK:       @ %bb.0: @ %entry
683; CHECK-NEXT:    vmovnt.i16 q1, q0
684; CHECK-NEXT:    vmov q0, q1
685; CHECK-NEXT:    bx lr
686;
687; CHECKBE-LABEL: vmovn8_b2:
688; CHECKBE:       @ %bb.0: @ %entry
689; CHECKBE-NEXT:    vrev64.8 q2, q0
690; CHECKBE-NEXT:    vrev64.8 q3, q1
691; CHECKBE-NEXT:    vmovnt.i16 q3, q2
692; CHECKBE-NEXT:    vrev64.8 q0, q3
693; CHECKBE-NEXT:    bx lr
694entry:
695  %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 16, i32 0, i32 18, i32 2, i32 20, i32 4, i32 22, i32 6, i32 24, i32 8, i32 26, i32 10, i32 28, i32 12, i32 30, i32 14>
696  ret <16 x i8> %out
697}
698
699define arm_aapcs_vfpcc <16 x i8> @vmovn8_t1(<16 x i8> %src1, <16 x i8> %src2) {
700; CHECK-LABEL: vmovn8_t1:
701; CHECK:       @ %bb.0: @ %entry
702; CHECK-NEXT:    vmovnb.i16 q1, q0
703; CHECK-NEXT:    vmov q0, q1
704; CHECK-NEXT:    bx lr
705;
706; CHECKBE-LABEL: vmovn8_t1:
707; CHECKBE:       @ %bb.0: @ %entry
708; CHECKBE-NEXT:    vrev64.8 q2, q0
709; CHECKBE-NEXT:    vrev64.8 q3, q1
710; CHECKBE-NEXT:    vmovnb.i16 q3, q2
711; CHECKBE-NEXT:    vrev64.8 q0, q3
712; CHECKBE-NEXT:    bx lr
713entry:
714  %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
715  ret <16 x i8> %out
716}
717
718define arm_aapcs_vfpcc <16 x i8> @vmovn8_t2(<16 x i8> %src1, <16 x i8> %src2) {
719; CHECK-LABEL: vmovn8_t2:
720; CHECK:       @ %bb.0: @ %entry
721; CHECK-NEXT:    vmov q2, q0
722; CHECK-NEXT:    vmov.u8 r0, q1[1]
723; CHECK-NEXT:    vmov.8 q0[0], r0
724; CHECK-NEXT:    vmov.u8 r0, q2[0]
725; CHECK-NEXT:    vmov.8 q0[1], r0
726; CHECK-NEXT:    vmov.u8 r0, q1[3]
727; CHECK-NEXT:    vmov.8 q0[2], r0
728; CHECK-NEXT:    vmov.u8 r0, q2[2]
729; CHECK-NEXT:    vmov.8 q0[3], r0
730; CHECK-NEXT:    vmov.u8 r0, q1[5]
731; CHECK-NEXT:    vmov.8 q0[4], r0
732; CHECK-NEXT:    vmov.u8 r0, q2[4]
733; CHECK-NEXT:    vmov.8 q0[5], r0
734; CHECK-NEXT:    vmov.u8 r0, q1[7]
735; CHECK-NEXT:    vmov.8 q0[6], r0
736; CHECK-NEXT:    vmov.u8 r0, q2[6]
737; CHECK-NEXT:    vmov.8 q0[7], r0
738; CHECK-NEXT:    vmov.u8 r0, q1[9]
739; CHECK-NEXT:    vmov.8 q0[8], r0
740; CHECK-NEXT:    vmov.u8 r0, q2[8]
741; CHECK-NEXT:    vmov.8 q0[9], r0
742; CHECK-NEXT:    vmov.u8 r0, q1[11]
743; CHECK-NEXT:    vmov.8 q0[10], r0
744; CHECK-NEXT:    vmov.u8 r0, q2[10]
745; CHECK-NEXT:    vmov.8 q0[11], r0
746; CHECK-NEXT:    vmov.u8 r0, q1[13]
747; CHECK-NEXT:    vmov.8 q0[12], r0
748; CHECK-NEXT:    vmov.u8 r0, q2[12]
749; CHECK-NEXT:    vmov.8 q0[13], r0
750; CHECK-NEXT:    vmov.u8 r0, q1[15]
751; CHECK-NEXT:    vmov.8 q0[14], r0
752; CHECK-NEXT:    vmov.u8 r0, q2[14]
753; CHECK-NEXT:    vmov.8 q0[15], r0
754; CHECK-NEXT:    bx lr
755;
756; CHECKBE-LABEL: vmovn8_t2:
757; CHECKBE:       @ %bb.0: @ %entry
758; CHECKBE-NEXT:    vrev64.8 q2, q1
759; CHECKBE-NEXT:    vrev64.8 q3, q0
760; CHECKBE-NEXT:    vmov.u8 r0, q2[1]
761; CHECKBE-NEXT:    vmov.8 q1[0], r0
762; CHECKBE-NEXT:    vmov.u8 r0, q3[0]
763; CHECKBE-NEXT:    vmov.8 q1[1], r0
764; CHECKBE-NEXT:    vmov.u8 r0, q2[3]
765; CHECKBE-NEXT:    vmov.8 q1[2], r0
766; CHECKBE-NEXT:    vmov.u8 r0, q3[2]
767; CHECKBE-NEXT:    vmov.8 q1[3], r0
768; CHECKBE-NEXT:    vmov.u8 r0, q2[5]
769; CHECKBE-NEXT:    vmov.8 q1[4], r0
770; CHECKBE-NEXT:    vmov.u8 r0, q3[4]
771; CHECKBE-NEXT:    vmov.8 q1[5], r0
772; CHECKBE-NEXT:    vmov.u8 r0, q2[7]
773; CHECKBE-NEXT:    vmov.8 q1[6], r0
774; CHECKBE-NEXT:    vmov.u8 r0, q3[6]
775; CHECKBE-NEXT:    vmov.8 q1[7], r0
776; CHECKBE-NEXT:    vmov.u8 r0, q2[9]
777; CHECKBE-NEXT:    vmov.8 q1[8], r0
778; CHECKBE-NEXT:    vmov.u8 r0, q3[8]
779; CHECKBE-NEXT:    vmov.8 q1[9], r0
780; CHECKBE-NEXT:    vmov.u8 r0, q2[11]
781; CHECKBE-NEXT:    vmov.8 q1[10], r0
782; CHECKBE-NEXT:    vmov.u8 r0, q3[10]
783; CHECKBE-NEXT:    vmov.8 q1[11], r0
784; CHECKBE-NEXT:    vmov.u8 r0, q2[13]
785; CHECKBE-NEXT:    vmov.8 q1[12], r0
786; CHECKBE-NEXT:    vmov.u8 r0, q3[12]
787; CHECKBE-NEXT:    vmov.8 q1[13], r0
788; CHECKBE-NEXT:    vmov.u8 r0, q2[15]
789; CHECKBE-NEXT:    vmov.8 q1[14], r0
790; CHECKBE-NEXT:    vmov.u8 r0, q3[14]
791; CHECKBE-NEXT:    vmov.8 q1[15], r0
792; CHECKBE-NEXT:    vrev64.8 q0, q1
793; CHECKBE-NEXT:    bx lr
794entry:
795  %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 17, i32 0, i32 19, i32 2, i32 21, i32 4, i32 23, i32 6, i32 25, i32 8, i32 27, i32 10, i32 29, i32 12, i32 31, i32 14>
796  ret <16 x i8> %out
797}
798
799define arm_aapcs_vfpcc <16 x i8> @vmovn8_t3(<16 x i8> %src1, <16 x i8> %src2) {
800; CHECK-LABEL: vmovn8_t3:
801; CHECK:       @ %bb.0: @ %entry
802; CHECK-NEXT:    vmov.u8 r0, q0[1]
803; CHECK-NEXT:    vmov q2, q0
804; CHECK-NEXT:    vmov.8 q0[0], r0
805; CHECK-NEXT:    vmov.u8 r0, q1[0]
806; CHECK-NEXT:    vmov.8 q0[1], r0
807; CHECK-NEXT:    vmov.u8 r0, q2[3]
808; CHECK-NEXT:    vmov.8 q0[2], r0
809; CHECK-NEXT:    vmov.u8 r0, q1[2]
810; CHECK-NEXT:    vmov.8 q0[3], r0
811; CHECK-NEXT:    vmov.u8 r0, q2[5]
812; CHECK-NEXT:    vmov.8 q0[4], r0
813; CHECK-NEXT:    vmov.u8 r0, q1[4]
814; CHECK-NEXT:    vmov.8 q0[5], r0
815; CHECK-NEXT:    vmov.u8 r0, q2[7]
816; CHECK-NEXT:    vmov.8 q0[6], r0
817; CHECK-NEXT:    vmov.u8 r0, q1[6]
818; CHECK-NEXT:    vmov.8 q0[7], r0
819; CHECK-NEXT:    vmov.u8 r0, q2[9]
820; CHECK-NEXT:    vmov.8 q0[8], r0
821; CHECK-NEXT:    vmov.u8 r0, q1[8]
822; CHECK-NEXT:    vmov.8 q0[9], r0
823; CHECK-NEXT:    vmov.u8 r0, q2[11]
824; CHECK-NEXT:    vmov.8 q0[10], r0
825; CHECK-NEXT:    vmov.u8 r0, q1[10]
826; CHECK-NEXT:    vmov.8 q0[11], r0
827; CHECK-NEXT:    vmov.u8 r0, q2[13]
828; CHECK-NEXT:    vmov.8 q0[12], r0
829; CHECK-NEXT:    vmov.u8 r0, q1[12]
830; CHECK-NEXT:    vmov.8 q0[13], r0
831; CHECK-NEXT:    vmov.u8 r0, q2[15]
832; CHECK-NEXT:    vmov.8 q0[14], r0
833; CHECK-NEXT:    vmov.u8 r0, q1[14]
834; CHECK-NEXT:    vmov.8 q0[15], r0
835; CHECK-NEXT:    bx lr
836;
837; CHECKBE-LABEL: vmovn8_t3:
838; CHECKBE:       @ %bb.0: @ %entry
839; CHECKBE-NEXT:    vrev64.8 q3, q0
840; CHECKBE-NEXT:    vrev64.8 q0, q1
841; CHECKBE-NEXT:    vmov.u8 r0, q3[1]
842; CHECKBE-NEXT:    vmov.8 q2[0], r0
843; CHECKBE-NEXT:    vmov.u8 r0, q0[0]
844; CHECKBE-NEXT:    vmov.8 q2[1], r0
845; CHECKBE-NEXT:    vmov.u8 r0, q3[3]
846; CHECKBE-NEXT:    vmov.8 q2[2], r0
847; CHECKBE-NEXT:    vmov.u8 r0, q0[2]
848; CHECKBE-NEXT:    vmov.8 q2[3], r0
849; CHECKBE-NEXT:    vmov.u8 r0, q3[5]
850; CHECKBE-NEXT:    vmov.8 q2[4], r0
851; CHECKBE-NEXT:    vmov.u8 r0, q0[4]
852; CHECKBE-NEXT:    vmov.8 q2[5], r0
853; CHECKBE-NEXT:    vmov.u8 r0, q3[7]
854; CHECKBE-NEXT:    vmov.8 q2[6], r0
855; CHECKBE-NEXT:    vmov.u8 r0, q0[6]
856; CHECKBE-NEXT:    vmov.8 q2[7], r0
857; CHECKBE-NEXT:    vmov.u8 r0, q3[9]
858; CHECKBE-NEXT:    vmov.8 q2[8], r0
859; CHECKBE-NEXT:    vmov.u8 r0, q0[8]
860; CHECKBE-NEXT:    vmov.8 q2[9], r0
861; CHECKBE-NEXT:    vmov.u8 r0, q3[11]
862; CHECKBE-NEXT:    vmov.8 q2[10], r0
863; CHECKBE-NEXT:    vmov.u8 r0, q0[10]
864; CHECKBE-NEXT:    vmov.8 q2[11], r0
865; CHECKBE-NEXT:    vmov.u8 r0, q3[13]
866; CHECKBE-NEXT:    vmov.8 q2[12], r0
867; CHECKBE-NEXT:    vmov.u8 r0, q0[12]
868; CHECKBE-NEXT:    vmov.8 q2[13], r0
869; CHECKBE-NEXT:    vmov.u8 r0, q3[15]
870; CHECKBE-NEXT:    vmov.8 q2[14], r0
871; CHECKBE-NEXT:    vmov.u8 r0, q0[14]
872; CHECKBE-NEXT:    vmov.8 q2[15], r0
873; CHECKBE-NEXT:    vrev64.8 q0, q2
874; CHECKBE-NEXT:    bx lr
875entry:
876  %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 1, i32 16, i32 3, i32 18, i32 5, i32 20, i32 7, i32 22, i32 9, i32 24, i32 11, i32 26, i32 13, i32 28, i32 15, i32 30>
877  ret <16 x i8> %out
878}
879
880define arm_aapcs_vfpcc <16 x i8> @vmovn8_t4(<16 x i8> %src1, <16 x i8> %src2) {
881; CHECK-LABEL: vmovn8_t4:
882; CHECK:       @ %bb.0: @ %entry
883; CHECK-NEXT:    vmovnb.i16 q0, q1
884; CHECK-NEXT:    bx lr
885;
886; CHECKBE-LABEL: vmovn8_t4:
887; CHECKBE:       @ %bb.0: @ %entry
888; CHECKBE-NEXT:    vrev64.8 q2, q1
889; CHECKBE-NEXT:    vrev64.8 q1, q0
890; CHECKBE-NEXT:    vmovnb.i16 q1, q2
891; CHECKBE-NEXT:    vrev64.8 q0, q1
892; CHECKBE-NEXT:    bx lr
893entry:
894  %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15>
895  ret <16 x i8> %out
896}
897
898define arm_aapcs_vfpcc <16 x i8> @vmovn8_single_t(<16 x i8> %src1) {
899; CHECK-LABEL: vmovn8_single_t:
900; CHECK:       @ %bb.0: @ %entry
901; CHECK-NEXT:    vmovnt.i16 q0, q0
902; CHECK-NEXT:    bx lr
903;
904; CHECKBE-LABEL: vmovn8_single_t:
905; CHECKBE:       @ %bb.0: @ %entry
906; CHECKBE-NEXT:    vrev64.8 q1, q0
907; CHECKBE-NEXT:    vmovnt.i16 q1, q1
908; CHECKBE-NEXT:    vrev64.8 q0, q1
909; CHECKBE-NEXT:    bx lr
910entry:
911  %out = shufflevector <16 x i8> %src1, <16 x i8> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
912  ret <16 x i8> %out
913}
914
915
916define arm_aapcs_vfpcc <8 x i16> @vmovn32trunct_undef2(<8 x i16> %a) {
917; CHECK-LABEL: vmovn32trunct_undef2:
918; CHECK:       @ %bb.0: @ %entry
919; CHECK-NEXT:    bx lr
920;
921; CHECKBE-LABEL: vmovn32trunct_undef2:
922; CHECKBE:       @ %bb.0: @ %entry
923; CHECKBE-NEXT:    bx lr
924entry:
925  %c1 = call <4 x i32> @llvm.arm.mve.vreinterpretq.v4i32.v8i16(<8 x i16> %a)
926  %c2 = call <4 x i32> @llvm.arm.mve.vreinterpretq.v4i32.v8i16(<8 x i16> undef)
927  %strided.vec = shufflevector <4 x i32> %c1, <4 x i32> %c2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
928  %out = trunc <8 x i32> %strided.vec to <8 x i16>
929  ret <8 x i16> %out
930}
931
932define arm_aapcs_vfpcc <8 x i16> @vmovn32trunct_undef1(<8 x i16> %a) {
933; CHECK-LABEL: vmovn32trunct_undef1:
934; CHECK:       @ %bb.0: @ %entry
935; CHECK-NEXT:    vmovnt.i32 q0, q0
936; CHECK-NEXT:    bx lr
937;
938; CHECKBE-LABEL: vmovn32trunct_undef1:
939; CHECKBE:       @ %bb.0: @ %entry
940; CHECKBE-NEXT:    vrev64.16 q1, q0
941; CHECKBE-NEXT:    vmovnt.i32 q1, q1
942; CHECKBE-NEXT:    vrev64.16 q0, q1
943; CHECKBE-NEXT:    bx lr
944entry:
945  %c1 = call <4 x i32> @llvm.arm.mve.vreinterpretq.v4i32.v8i16(<8 x i16> undef)
946  %c2 = call <4 x i32> @llvm.arm.mve.vreinterpretq.v4i32.v8i16(<8 x i16> %a)
947  %strided.vec = shufflevector <4 x i32> %c1, <4 x i32> %c2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
948  %out = trunc <8 x i32> %strided.vec to <8 x i16>
949  ret <8 x i16> %out
950}
951
952define arm_aapcs_vfpcc <8 x i16> @vmovn16b_undef2(<16 x i8> %a) {
953; CHECK-LABEL: vmovn16b_undef2:
954; CHECK:       @ %bb.0: @ %entry
955; CHECK-NEXT:    bx lr
956;
957; CHECKBE-LABEL: vmovn16b_undef2:
958; CHECKBE:       @ %bb.0: @ %entry
959; CHECKBE-NEXT:    vrev64.8 q1, q0
960; CHECKBE-NEXT:    vrev64.16 q0, q1
961; CHECKBE-NEXT:    bx lr
962entry:
963  %c1 = call <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8> %a)
964  %c2 = call <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8> undef)
965  %out = shufflevector <8 x i16> %c1, <8 x i16> %c2, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
966  ret <8 x i16> %out
967}
968
969define arm_aapcs_vfpcc <8 x i16> @vmovn16b_undef1(<16 x i8> %a) {
970; CHECK-LABEL: vmovn16b_undef1:
971; CHECK:       @ %bb.0: @ %entry
972; CHECK-NEXT:    bx lr
973;
974; CHECKBE-LABEL: vmovn16b_undef1:
975; CHECKBE:       @ %bb.0: @ %entry
976; CHECKBE-NEXT:    vrev64.8 q1, q0
977; CHECKBE-NEXT:    vrev64.16 q0, q1
978; CHECKBE-NEXT:    bx lr
979entry:
980  %c1 = call <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8> undef)
981  %c2 = call <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8> %a)
982  %out = shufflevector <8 x i16> %c1, <8 x i16> %c2, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
983  ret <8 x i16> %out
984}
985
986define arm_aapcs_vfpcc <8 x i16> @vmovn32_badlanes(<4 x i32> %src1) {
987; CHECK-MVE-LABEL: vmovn32_badlanes:
988; CHECK-MVE:       @ %bb.0: @ %entry
989; CHECK-MVE-NEXT:    vmov r0, r1, d0
990; CHECK-MVE-NEXT:    vmov.16 q1[1], r0
991; CHECK-MVE-NEXT:    vmov r0, s2
992; CHECK-MVE-NEXT:    vmov.16 q1[3], r1
993; CHECK-MVE-NEXT:    vmov.16 q1[5], r1
994; CHECK-MVE-NEXT:    vmov.16 q1[7], r0
995; CHECK-MVE-NEXT:    vmov q0, q1
996; CHECK-MVE-NEXT:    bx lr
997;
998; CHECK-MVEFP-LABEL: vmovn32_badlanes:
999; CHECK-MVEFP:       @ %bb.0: @ %entry
1000; CHECK-MVEFP-NEXT:    vmov r1, r2, d0
1001; CHECK-MVEFP-NEXT:    vmov r0, s2
1002; CHECK-MVEFP-NEXT:    vmov.16 q0[1], r1
1003; CHECK-MVEFP-NEXT:    vmov.16 q0[3], r2
1004; CHECK-MVEFP-NEXT:    vmov.16 q0[5], r2
1005; CHECK-MVEFP-NEXT:    vmov.16 q0[7], r0
1006; CHECK-MVEFP-NEXT:    bx lr
1007;
1008; CHECKBE-LABEL: vmovn32_badlanes:
1009; CHECKBE:       @ %bb.0: @ %entry
1010; CHECKBE-NEXT:    vrev64.32 q1, q0
1011; CHECKBE-NEXT:    vmov r0, r1, d2
1012; CHECKBE-NEXT:    vmov r2, s6
1013; CHECKBE-NEXT:    vmov.16 q1[1], r0
1014; CHECKBE-NEXT:    vmov.16 q1[3], r1
1015; CHECKBE-NEXT:    vmov.16 q1[5], r1
1016; CHECKBE-NEXT:    vmov.16 q1[7], r2
1017; CHECKBE-NEXT:    vrev64.16 q0, q1
1018; CHECKBE-NEXT:    bx lr
1019entry:
1020  %strided.vec = shufflevector <4 x i32> %src1, <4 x i32> undef, <8 x i32> <i32 4, i32 0, i32 5, i32 1, i32 6, i32 1, i32 7, i32 2>
1021  %out = trunc <8 x i32> %strided.vec to <8 x i16>
1022  ret <8 x i16> %out
1023}
1024
1025define arm_aapcs_vfpcc <16 x i8> @vmovn16trunct_undef2(<16 x i8> %a) {
1026; CHECK-LABEL: vmovn16trunct_undef2:
1027; CHECK:       @ %bb.0: @ %entry
1028; CHECK-NEXT:    bx lr
1029;
1030; CHECKBE-LABEL: vmovn16trunct_undef2:
1031; CHECKBE:       @ %bb.0: @ %entry
1032; CHECKBE-NEXT:    bx lr
1033entry:
1034  %c1 = call <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8> %a)
1035  %c2 = call <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8> undef)
1036  %strided.vec = shufflevector <8 x i16> %c1, <8 x i16> %c2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1037  %out = trunc <16 x i16> %strided.vec to <16 x i8>
1038  ret <16 x i8> %out
1039}
1040
1041define arm_aapcs_vfpcc <16 x i8> @vmovn16trunct_undef1(<16 x i8> %a) {
1042; CHECK-LABEL: vmovn16trunct_undef1:
1043; CHECK:       @ %bb.0: @ %entry
1044; CHECK-NEXT:    vmovnt.i16 q0, q0
1045; CHECK-NEXT:    bx lr
1046;
1047; CHECKBE-LABEL: vmovn16trunct_undef1:
1048; CHECKBE:       @ %bb.0: @ %entry
1049; CHECKBE-NEXT:    vrev64.8 q1, q0
1050; CHECKBE-NEXT:    vmovnt.i16 q1, q1
1051; CHECKBE-NEXT:    vrev64.8 q0, q1
1052; CHECKBE-NEXT:    bx lr
1053entry:
1054  %c1 = call <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8> undef)
1055  %c2 = call <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8> %a)
1056  %strided.vec = shufflevector <8 x i16> %c1, <8 x i16> %c2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1057  %out = trunc <16 x i16> %strided.vec to <16 x i8>
1058  ret <16 x i8> %out
1059}
1060
1061declare <4 x i32> @llvm.arm.mve.vreinterpretq.v4i32.v8i16(<8 x i16>)
1062declare <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8>)
1063