xref: /llvm-project/llvm/test/CodeGen/Thumb2/mve-vmovnstore.ll (revision 8998ff53c91687b1065d095f6ac0ad7578131d73)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-MVE
3; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-MVEFP
4
5define arm_aapcs_vfpcc void @vmovn32_trunc1(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) {
6; CHECK-LABEL: vmovn32_trunc1:
7; CHECK:       @ %bb.0: @ %entry
8; CHECK-NEXT:    vmovnt.i32 q0, q1
9; CHECK-NEXT:    vstrw.32 q0, [r0]
10; CHECK-NEXT:    bx lr
11entry:
12  %strided.vec = shufflevector <4 x i32> %src1, <4 x i32> %src2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
13  %out = trunc <8 x i32> %strided.vec to <8 x i16>
14  store <8 x i16> %out, ptr %dest, align 8
15  ret void
16}
17
18define arm_aapcs_vfpcc void @vmovn32_trunc2(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) {
19; CHECK-LABEL: vmovn32_trunc2:
20; CHECK:       @ %bb.0: @ %entry
21; CHECK-NEXT:    vmovnt.i32 q1, q0
22; CHECK-NEXT:    vstrw.32 q1, [r0]
23; CHECK-NEXT:    bx lr
24entry:
25  %strided.vec = shufflevector <4 x i32> %src1, <4 x i32> %src2, <8 x i32> <i32 4, i32 0, i32 5, i32 1, i32 6, i32 2, i32 7, i32 3>
26  %out = trunc <8 x i32> %strided.vec to <8 x i16>
27  store <8 x i16> %out, ptr %dest, align 8
28  ret void
29}
30
31define arm_aapcs_vfpcc void @vmovn32_trunc1_onesrc(<8 x i32> %src1, ptr %dest) {
32; CHECK-LABEL: vmovn32_trunc1_onesrc:
33; CHECK:       @ %bb.0: @ %entry
34; CHECK-NEXT:    vmovnt.i32 q0, q1
35; CHECK-NEXT:    vstrw.32 q0, [r0]
36; CHECK-NEXT:    bx lr
37entry:
38  %strided.vec = shufflevector <8 x i32> %src1, <8 x i32> undef, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
39  %out = trunc <8 x i32> %strided.vec to <8 x i16>
40  store <8 x i16> %out, ptr %dest, align 8
41  ret void
42}
43
44define arm_aapcs_vfpcc void @vmovn32_trunc2_onesrc(<8 x i32> %src1, ptr %dest) {
45; CHECK-LABEL: vmovn32_trunc2_onesrc:
46; CHECK:       @ %bb.0: @ %entry
47; CHECK-NEXT:    vmovnt.i32 q1, q0
48; CHECK-NEXT:    vstrw.32 q1, [r0]
49; CHECK-NEXT:    bx lr
50entry:
51  %strided.vec = shufflevector <8 x i32> %src1, <8 x i32> undef, <8 x i32> <i32 4, i32 0, i32 5, i32 1, i32 6, i32 2, i32 7, i32 3>
52  %out = trunc <8 x i32> %strided.vec to <8 x i16>
53  store <8 x i16> %out, ptr %dest, align 8
54  ret void
55}
56
57define arm_aapcs_vfpcc void @vmovn16_trunc1(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) {
58; CHECK-LABEL: vmovn16_trunc1:
59; CHECK:       @ %bb.0: @ %entry
60; CHECK-NEXT:    vmovnt.i16 q0, q1
61; CHECK-NEXT:    vstrw.32 q0, [r0]
62; CHECK-NEXT:    bx lr
63entry:
64  %strided.vec = shufflevector <8 x i16> %src1, <8 x i16> %src2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
65  %out = trunc <16 x i16> %strided.vec to <16 x i8>
66  store <16 x i8> %out, ptr %dest, align 8
67  ret void
68}
69
70define arm_aapcs_vfpcc void @vmovn16_trunc2(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) {
71; CHECK-LABEL: vmovn16_trunc2:
72; CHECK:       @ %bb.0: @ %entry
73; CHECK-NEXT:    vmovnt.i16 q1, q0
74; CHECK-NEXT:    vstrw.32 q1, [r0]
75; CHECK-NEXT:    bx lr
76entry:
77  %strided.vec = shufflevector <8 x i16> %src1, <8 x i16> %src2, <16 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3, i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7>
78  %out = trunc <16 x i16> %strided.vec to <16 x i8>
79  store <16 x i8> %out, ptr %dest, align 8
80  ret void
81}
82
83define arm_aapcs_vfpcc void @vmovn16_trunc1_onesrc(<16 x i16> %src1, ptr %dest) {
84; CHECK-LABEL: vmovn16_trunc1_onesrc:
85; CHECK:       @ %bb.0: @ %entry
86; CHECK-NEXT:    vmovnt.i16 q0, q1
87; CHECK-NEXT:    vstrw.32 q0, [r0]
88; CHECK-NEXT:    bx lr
89entry:
90  %strided.vec = shufflevector <16 x i16> %src1, <16 x i16> undef, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
91  %out = trunc <16 x i16> %strided.vec to <16 x i8>
92  store <16 x i8> %out, ptr %dest, align 8
93  ret void
94}
95
96define arm_aapcs_vfpcc void @vmovn16_trunc2_onesrc(<16 x i16> %src1, ptr %dest) {
97; CHECK-LABEL: vmovn16_trunc2_onesrc:
98; CHECK:       @ %bb.0: @ %entry
99; CHECK-NEXT:    vmovnt.i16 q1, q0
100; CHECK-NEXT:    vstrw.32 q1, [r0]
101; CHECK-NEXT:    bx lr
102entry:
103  %strided.vec = shufflevector <16 x i16> %src1, <16 x i16> undef, <16 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3, i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7>
104  %out = trunc <16 x i16> %strided.vec to <16 x i8>
105  store <16 x i8> %out, ptr %dest, align 8
106  ret void
107}
108
109
110define arm_aapcs_vfpcc void @vmovn64_t1(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) {
111; CHECK-LABEL: vmovn64_t1:
112; CHECK:       @ %bb.0: @ %entry
113; CHECK-NEXT:    vmov.f32 s2, s4
114; CHECK-NEXT:    vmov.f32 s3, s5
115; CHECK-NEXT:    vstrw.32 q0, [r0]
116; CHECK-NEXT:    bx lr
117entry:
118  %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 0, i32 2>
119  store <2 x i64> %out, ptr %dest, align 8
120  ret void
121}
122
123define arm_aapcs_vfpcc void @vmovn64_t2(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) {
124; CHECK-LABEL: vmovn64_t2:
125; CHECK:       @ %bb.0: @ %entry
126; CHECK-NEXT:    vmov.f32 s6, s0
127; CHECK-NEXT:    vmov.f32 s7, s1
128; CHECK-NEXT:    vstrw.32 q1, [r0]
129; CHECK-NEXT:    bx lr
130entry:
131  %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 2, i32 0>
132  store <2 x i64> %out, ptr %dest, align 8
133  ret void
134}
135
136define arm_aapcs_vfpcc void @vmovn64_b1(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) {
137; CHECK-LABEL: vmovn64_b1:
138; CHECK:       @ %bb.0: @ %entry
139; CHECK-NEXT:    vmov.f32 s2, s6
140; CHECK-NEXT:    vmov.f32 s3, s7
141; CHECK-NEXT:    vstrw.32 q0, [r0]
142; CHECK-NEXT:    bx lr
143entry:
144  %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 0, i32 3>
145  store <2 x i64> %out, ptr %dest, align 8
146  ret void
147}
148
149define arm_aapcs_vfpcc void @vmovn64_b2(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) {
150; CHECK-LABEL: vmovn64_b2:
151; CHECK:       @ %bb.0: @ %entry
152; CHECK-NEXT:    vmov.f32 s4, s6
153; CHECK-NEXT:    vmov.f32 s6, s0
154; CHECK-NEXT:    vmov.f32 s5, s7
155; CHECK-NEXT:    vmov.f32 s7, s1
156; CHECK-NEXT:    vstrw.32 q1, [r0]
157; CHECK-NEXT:    bx lr
158entry:
159  %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 3, i32 0>
160  store <2 x i64> %out, ptr %dest, align 8
161  ret void
162}
163
164define arm_aapcs_vfpcc void @vmovn64_b3(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) {
165; CHECK-LABEL: vmovn64_b3:
166; CHECK:       @ %bb.0: @ %entry
167; CHECK-NEXT:    vmov.f32 s0, s2
168; CHECK-NEXT:    vmov.f32 s2, s4
169; CHECK-NEXT:    vmov.f32 s1, s3
170; CHECK-NEXT:    vmov.f32 s3, s5
171; CHECK-NEXT:    vstrw.32 q0, [r0]
172; CHECK-NEXT:    bx lr
173entry:
174  %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 1, i32 2>
175  store <2 x i64> %out, ptr %dest, align 8
176  ret void
177}
178
179define arm_aapcs_vfpcc void @vmovn64_b4(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) {
180; CHECK-LABEL: vmovn64_b4:
181; CHECK:       @ %bb.0: @ %entry
182; CHECK-NEXT:    vmov.f32 s6, s2
183; CHECK-NEXT:    vmov.f32 s7, s3
184; CHECK-NEXT:    vstrw.32 q1, [r0]
185; CHECK-NEXT:    bx lr
186entry:
187  %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 2, i32 1>
188  store <2 x i64> %out, ptr %dest, align 8
189  ret void
190}
191
192
193
194define arm_aapcs_vfpcc void @vmovn32_t1(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) {
195; CHECK-LABEL: vmovn32_t1:
196; CHECK:       @ %bb.0: @ %entry
197; CHECK-NEXT:    vmov.f32 s1, s4
198; CHECK-NEXT:    vmov.f32 s3, s6
199; CHECK-NEXT:    vstrw.32 q0, [r0]
200; CHECK-NEXT:    bx lr
201entry:
202  %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
203  store <4 x i32> %out, ptr %dest, align 8
204  ret void
205}
206
207define arm_aapcs_vfpcc void @vmovn32_t2(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) {
208; CHECK-LABEL: vmovn32_t2:
209; CHECK:       @ %bb.0: @ %entry
210; CHECK-NEXT:    vmov.f32 s5, s0
211; CHECK-NEXT:    vmov.f32 s7, s2
212; CHECK-NEXT:    vstrw.32 q1, [r0]
213; CHECK-NEXT:    bx lr
214entry:
215  %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 4, i32 0, i32 6, i32 2>
216  store <4 x i32> %out, ptr %dest, align 8
217  ret void
218}
219
220define arm_aapcs_vfpcc void @vmovn32_b1(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) {
221; CHECK-LABEL: vmovn32_b1:
222; CHECK:       @ %bb.0: @ %entry
223; CHECK-NEXT:    vmov.f32 s1, s5
224; CHECK-NEXT:    vmov.f32 s3, s7
225; CHECK-NEXT:    vstrw.32 q0, [r0]
226; CHECK-NEXT:    bx lr
227entry:
228  %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
229  store <4 x i32> %out, ptr %dest, align 8
230  ret void
231}
232
233define arm_aapcs_vfpcc void @vmovn32_b2(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) {
234; CHECK-LABEL: vmovn32_b2:
235; CHECK:       @ %bb.0: @ %entry
236; CHECK-NEXT:    vmov.f32 s4, s5
237; CHECK-NEXT:    vmov.f32 s6, s7
238; CHECK-NEXT:    vmov.f32 s5, s0
239; CHECK-NEXT:    vmov.f32 s7, s2
240; CHECK-NEXT:    vstrw.32 q1, [r0]
241; CHECK-NEXT:    bx lr
242entry:
243  %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 5, i32 0, i32 7, i32 2>
244  store <4 x i32> %out, ptr %dest, align 8
245  ret void
246}
247
248define arm_aapcs_vfpcc void @vmovn32_b3(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) {
249; CHECK-LABEL: vmovn32_b3:
250; CHECK:       @ %bb.0: @ %entry
251; CHECK-NEXT:    vmov.f32 s0, s1
252; CHECK-NEXT:    vmov.f32 s2, s3
253; CHECK-NEXT:    vmov.f32 s1, s4
254; CHECK-NEXT:    vmov.f32 s3, s6
255; CHECK-NEXT:    vstrw.32 q0, [r0]
256; CHECK-NEXT:    bx lr
257entry:
258  %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 1, i32 4, i32 3, i32 6>
259  store <4 x i32> %out, ptr %dest, align 8
260  ret void
261}
262
263define arm_aapcs_vfpcc void @vmovn32_b4(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) {
264; CHECK-LABEL: vmovn32_b4:
265; CHECK:       @ %bb.0: @ %entry
266; CHECK-NEXT:    vmov.f32 s5, s1
267; CHECK-NEXT:    vmov.f32 s7, s3
268; CHECK-NEXT:    vstrw.32 q1, [r0]
269; CHECK-NEXT:    bx lr
270entry:
271  %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
272  store <4 x i32> %out, ptr %dest, align 8
273  ret void
274}
275
276
277
278
279define arm_aapcs_vfpcc void @vmovn16_t1(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) {
280; CHECK-LABEL: vmovn16_t1:
281; CHECK:       @ %bb.0: @ %entry
282; CHECK-NEXT:    vmovnt.i32 q0, q1
283; CHECK-NEXT:    vstrw.32 q0, [r0]
284; CHECK-NEXT:    bx lr
285entry:
286  %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
287  store <8 x i16> %out, ptr %dest, align 8
288  ret void
289}
290
291define arm_aapcs_vfpcc void @vmovn16_t2(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) {
292; CHECK-LABEL: vmovn16_t2:
293; CHECK:       @ %bb.0: @ %entry
294; CHECK-NEXT:    vmovnt.i32 q1, q0
295; CHECK-NEXT:    vstrw.32 q1, [r0]
296; CHECK-NEXT:    bx lr
297entry:
298  %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 8, i32 0, i32 10, i32 2, i32 12, i32 4, i32 14, i32 6>
299  store <8 x i16> %out, ptr %dest, align 8
300  ret void
301}
302
303define arm_aapcs_vfpcc void @vmovn16_b1(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) {
304; CHECK-LABEL: vmovn16_b1:
305; CHECK:       @ %bb.0: @ %entry
306; CHECK-NEXT:    vmovnb.i32 q1, q0
307; CHECK-NEXT:    vstrw.32 q1, [r0]
308; CHECK-NEXT:    bx lr
309entry:
310  %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
311  store <8 x i16> %out, ptr %dest, align 8
312  ret void
313}
314
315define arm_aapcs_vfpcc void @vmovn16_b2(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) {
316; CHECK-MVE-LABEL: vmovn16_b2:
317; CHECK-MVE:       @ %bb.0: @ %entry
318; CHECK-MVE-NEXT:    vmov.u16 r1, q1[1]
319; CHECK-MVE-NEXT:    vmov.16 q2[0], r1
320; CHECK-MVE-NEXT:    vmov.u16 r1, q0[0]
321; CHECK-MVE-NEXT:    vmov.16 q2[1], r1
322; CHECK-MVE-NEXT:    vmov.u16 r1, q1[3]
323; CHECK-MVE-NEXT:    vmov.16 q2[2], r1
324; CHECK-MVE-NEXT:    vmov.u16 r1, q0[2]
325; CHECK-MVE-NEXT:    vmov.16 q2[3], r1
326; CHECK-MVE-NEXT:    vmov.u16 r1, q1[5]
327; CHECK-MVE-NEXT:    vmov.16 q2[4], r1
328; CHECK-MVE-NEXT:    vmov.u16 r1, q0[4]
329; CHECK-MVE-NEXT:    vmov.16 q2[5], r1
330; CHECK-MVE-NEXT:    vmov.u16 r1, q1[7]
331; CHECK-MVE-NEXT:    vmov.16 q2[6], r1
332; CHECK-MVE-NEXT:    vmov.u16 r1, q0[6]
333; CHECK-MVE-NEXT:    vmov.16 q2[7], r1
334; CHECK-MVE-NEXT:    vstrw.32 q2, [r0]
335; CHECK-MVE-NEXT:    bx lr
336;
337; CHECK-MVEFP-LABEL: vmovn16_b2:
338; CHECK-MVEFP:       @ %bb.0: @ %entry
339; CHECK-MVEFP-NEXT:    vmovx.f16 s4, s4
340; CHECK-MVEFP-NEXT:    vmovx.f16 s5, s5
341; CHECK-MVEFP-NEXT:    vmovx.f16 s6, s6
342; CHECK-MVEFP-NEXT:    vmovx.f16 s7, s7
343; CHECK-MVEFP-NEXT:    vins.f16 s4, s0
344; CHECK-MVEFP-NEXT:    vins.f16 s5, s1
345; CHECK-MVEFP-NEXT:    vins.f16 s6, s2
346; CHECK-MVEFP-NEXT:    vins.f16 s7, s3
347; CHECK-MVEFP-NEXT:    vstrw.32 q1, [r0]
348; CHECK-MVEFP-NEXT:    bx lr
349entry:
350  %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 9, i32 0, i32 11, i32 2, i32 13, i32 4, i32 15, i32 6>
351  store <8 x i16> %out, ptr %dest, align 8
352  ret void
353}
354
355define arm_aapcs_vfpcc void @vmovn16_b3(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) {
356; CHECK-MVE-LABEL: vmovn16_b3:
357; CHECK-MVE:       @ %bb.0: @ %entry
358; CHECK-MVE-NEXT:    vmov.u16 r1, q0[1]
359; CHECK-MVE-NEXT:    vmov.16 q2[0], r1
360; CHECK-MVE-NEXT:    vmov.u16 r1, q1[0]
361; CHECK-MVE-NEXT:    vmov.16 q2[1], r1
362; CHECK-MVE-NEXT:    vmov.u16 r1, q0[3]
363; CHECK-MVE-NEXT:    vmov.16 q2[2], r1
364; CHECK-MVE-NEXT:    vmov.u16 r1, q1[2]
365; CHECK-MVE-NEXT:    vmov.16 q2[3], r1
366; CHECK-MVE-NEXT:    vmov.u16 r1, q0[5]
367; CHECK-MVE-NEXT:    vmov.16 q2[4], r1
368; CHECK-MVE-NEXT:    vmov.u16 r1, q1[4]
369; CHECK-MVE-NEXT:    vmov.16 q2[5], r1
370; CHECK-MVE-NEXT:    vmov.u16 r1, q0[7]
371; CHECK-MVE-NEXT:    vmov.16 q2[6], r1
372; CHECK-MVE-NEXT:    vmov.u16 r1, q1[6]
373; CHECK-MVE-NEXT:    vmov.16 q2[7], r1
374; CHECK-MVE-NEXT:    vstrw.32 q2, [r0]
375; CHECK-MVE-NEXT:    bx lr
376;
377; CHECK-MVEFP-LABEL: vmovn16_b3:
378; CHECK-MVEFP:       @ %bb.0: @ %entry
379; CHECK-MVEFP-NEXT:    vmovx.f16 s0, s0
380; CHECK-MVEFP-NEXT:    vmovx.f16 s1, s1
381; CHECK-MVEFP-NEXT:    vmovx.f16 s2, s2
382; CHECK-MVEFP-NEXT:    vmovx.f16 s3, s3
383; CHECK-MVEFP-NEXT:    vins.f16 s0, s4
384; CHECK-MVEFP-NEXT:    vins.f16 s1, s5
385; CHECK-MVEFP-NEXT:    vins.f16 s2, s6
386; CHECK-MVEFP-NEXT:    vins.f16 s3, s7
387; CHECK-MVEFP-NEXT:    vstrw.32 q0, [r0]
388; CHECK-MVEFP-NEXT:    bx lr
389entry:
390  %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 1, i32 8, i32 3, i32 10, i32 5, i32 12, i32 7, i32 14>
391  store <8 x i16> %out, ptr %dest, align 8
392  ret void
393}
394
395define arm_aapcs_vfpcc void @vmovn16_b4(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) {
396; CHECK-LABEL: vmovn16_b4:
397; CHECK:       @ %bb.0: @ %entry
398; CHECK-NEXT:    vmovnb.i32 q0, q1
399; CHECK-NEXT:    vstrw.32 q0, [r0]
400; CHECK-NEXT:    bx lr
401entry:
402  %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
403  store <8 x i16> %out, ptr %dest, align 8
404  ret void
405}
406
407
408define arm_aapcs_vfpcc void @vmovn8_b1(<16 x i8> %src1, <16 x i8> %src2, ptr %dest) {
409; CHECK-LABEL: vmovn8_b1:
410; CHECK:       @ %bb.0: @ %entry
411; CHECK-NEXT:    vmovnt.i16 q0, q1
412; CHECK-NEXT:    vstrw.32 q0, [r0]
413; CHECK-NEXT:    bx lr
414entry:
415  %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
416  store <16 x i8> %out, ptr %dest, align 8
417  ret void
418}
419
420define arm_aapcs_vfpcc void @vmovn8_b2(<16 x i8> %src1, <16 x i8> %src2, ptr %dest) {
421; CHECK-LABEL: vmovn8_b2:
422; CHECK:       @ %bb.0: @ %entry
423; CHECK-NEXT:    vmovnt.i16 q1, q0
424; CHECK-NEXT:    vstrw.32 q1, [r0]
425; CHECK-NEXT:    bx lr
426entry:
427  %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 16, i32 0, i32 18, i32 2, i32 20, i32 4, i32 22, i32 6, i32 24, i32 8, i32 26, i32 10, i32 28, i32 12, i32 30, i32 14>
428  store <16 x i8> %out, ptr %dest, align 8
429  ret void
430}
431
432define arm_aapcs_vfpcc void @vmovn8_t1(<16 x i8> %src1, <16 x i8> %src2, ptr %dest) {
433; CHECK-LABEL: vmovn8_t1:
434; CHECK:       @ %bb.0: @ %entry
435; CHECK-NEXT:    vmovnb.i16 q1, q0
436; CHECK-NEXT:    vstrw.32 q1, [r0]
437; CHECK-NEXT:    bx lr
438entry:
439  %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
440  store <16 x i8> %out, ptr %dest, align 8
441  ret void
442}
443
444define arm_aapcs_vfpcc void @vmovn8_t2(<16 x i8> %src1, <16 x i8> %src2, ptr %dest) {
445; CHECK-LABEL: vmovn8_t2:
446; CHECK:       @ %bb.0: @ %entry
447; CHECK-NEXT:    vmov.u8 r1, q1[1]
448; CHECK-NEXT:    vmov.8 q2[0], r1
449; CHECK-NEXT:    vmov.u8 r1, q0[0]
450; CHECK-NEXT:    vmov.8 q2[1], r1
451; CHECK-NEXT:    vmov.u8 r1, q1[3]
452; CHECK-NEXT:    vmov.8 q2[2], r1
453; CHECK-NEXT:    vmov.u8 r1, q0[2]
454; CHECK-NEXT:    vmov.8 q2[3], r1
455; CHECK-NEXT:    vmov.u8 r1, q1[5]
456; CHECK-NEXT:    vmov.8 q2[4], r1
457; CHECK-NEXT:    vmov.u8 r1, q0[4]
458; CHECK-NEXT:    vmov.8 q2[5], r1
459; CHECK-NEXT:    vmov.u8 r1, q1[7]
460; CHECK-NEXT:    vmov.8 q2[6], r1
461; CHECK-NEXT:    vmov.u8 r1, q0[6]
462; CHECK-NEXT:    vmov.8 q2[7], r1
463; CHECK-NEXT:    vmov.u8 r1, q1[9]
464; CHECK-NEXT:    vmov.8 q2[8], r1
465; CHECK-NEXT:    vmov.u8 r1, q0[8]
466; CHECK-NEXT:    vmov.8 q2[9], r1
467; CHECK-NEXT:    vmov.u8 r1, q1[11]
468; CHECK-NEXT:    vmov.8 q2[10], r1
469; CHECK-NEXT:    vmov.u8 r1, q0[10]
470; CHECK-NEXT:    vmov.8 q2[11], r1
471; CHECK-NEXT:    vmov.u8 r1, q1[13]
472; CHECK-NEXT:    vmov.8 q2[12], r1
473; CHECK-NEXT:    vmov.u8 r1, q0[12]
474; CHECK-NEXT:    vmov.8 q2[13], r1
475; CHECK-NEXT:    vmov.u8 r1, q1[15]
476; CHECK-NEXT:    vmov.8 q2[14], r1
477; CHECK-NEXT:    vmov.u8 r1, q0[14]
478; CHECK-NEXT:    vmov.8 q2[15], r1
479; CHECK-NEXT:    vstrw.32 q2, [r0]
480; CHECK-NEXT:    bx lr
481entry:
482  %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 17, i32 0, i32 19, i32 2, i32 21, i32 4, i32 23, i32 6, i32 25, i32 8, i32 27, i32 10, i32 29, i32 12, i32 31, i32 14>
483  store <16 x i8> %out, ptr %dest, align 8
484  ret void
485}
486
487define arm_aapcs_vfpcc void @vmovn8_t3(<16 x i8> %src1, <16 x i8> %src2, ptr %dest) {
488; CHECK-LABEL: vmovn8_t3:
489; CHECK:       @ %bb.0: @ %entry
490; CHECK-NEXT:    vmov.u8 r1, q0[1]
491; CHECK-NEXT:    vmov.8 q2[0], r1
492; CHECK-NEXT:    vmov.u8 r1, q1[0]
493; CHECK-NEXT:    vmov.8 q2[1], r1
494; CHECK-NEXT:    vmov.u8 r1, q0[3]
495; CHECK-NEXT:    vmov.8 q2[2], r1
496; CHECK-NEXT:    vmov.u8 r1, q1[2]
497; CHECK-NEXT:    vmov.8 q2[3], r1
498; CHECK-NEXT:    vmov.u8 r1, q0[5]
499; CHECK-NEXT:    vmov.8 q2[4], r1
500; CHECK-NEXT:    vmov.u8 r1, q1[4]
501; CHECK-NEXT:    vmov.8 q2[5], r1
502; CHECK-NEXT:    vmov.u8 r1, q0[7]
503; CHECK-NEXT:    vmov.8 q2[6], r1
504; CHECK-NEXT:    vmov.u8 r1, q1[6]
505; CHECK-NEXT:    vmov.8 q2[7], r1
506; CHECK-NEXT:    vmov.u8 r1, q0[9]
507; CHECK-NEXT:    vmov.8 q2[8], r1
508; CHECK-NEXT:    vmov.u8 r1, q1[8]
509; CHECK-NEXT:    vmov.8 q2[9], r1
510; CHECK-NEXT:    vmov.u8 r1, q0[11]
511; CHECK-NEXT:    vmov.8 q2[10], r1
512; CHECK-NEXT:    vmov.u8 r1, q1[10]
513; CHECK-NEXT:    vmov.8 q2[11], r1
514; CHECK-NEXT:    vmov.u8 r1, q0[13]
515; CHECK-NEXT:    vmov.8 q2[12], r1
516; CHECK-NEXT:    vmov.u8 r1, q1[12]
517; CHECK-NEXT:    vmov.8 q2[13], r1
518; CHECK-NEXT:    vmov.u8 r1, q0[15]
519; CHECK-NEXT:    vmov.8 q2[14], r1
520; CHECK-NEXT:    vmov.u8 r1, q1[14]
521; CHECK-NEXT:    vmov.8 q2[15], r1
522; CHECK-NEXT:    vstrw.32 q2, [r0]
523; CHECK-NEXT:    bx lr
524entry:
525  %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 1, i32 16, i32 3, i32 18, i32 5, i32 20, i32 7, i32 22, i32 9, i32 24, i32 11, i32 26, i32 13, i32 28, i32 15, i32 30>
526  store <16 x i8> %out, ptr %dest, align 8
527  ret void
528}
529
530define arm_aapcs_vfpcc void @vmovn8_t4(<16 x i8> %src1, <16 x i8> %src2, ptr %dest) {
531; CHECK-LABEL: vmovn8_t4:
532; CHECK:       @ %bb.0: @ %entry
533; CHECK-NEXT:    vmovnb.i16 q0, q1
534; CHECK-NEXT:    vstrw.32 q0, [r0]
535; CHECK-NEXT:    bx lr
536entry:
537  %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15>
538  store <16 x i8> %out, ptr %dest, align 8
539  ret void
540}
541