xref: /llvm-project/llvm/test/CodeGen/Thumb2/mve-shufflemov.ll (revision 7b3bbd83c0c24087072ec5b22a76799ab31f87d5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s
3; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s
4
5; i16
6
7define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_45670123(<8 x i16> %s1, <8 x i16> %s2) {
8; CHECK-LABEL: shuffle_i16_45670123:
9; CHECK:       @ %bb.0: @ %entry
10; CHECK-NEXT:    vmov.f32 s4, s2
11; CHECK-NEXT:    vmov.f32 s6, s0
12; CHECK-NEXT:    vmov.f32 s5, s3
13; CHECK-NEXT:    vmov.f32 s7, s1
14; CHECK-NEXT:    vmov q0, q1
15; CHECK-NEXT:    bx lr
16entry:
17  %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
18  ret <8 x i16> %out
19}
20
21define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_67452301(<8 x i16> %s1, <8 x i16> %s2) {
22; CHECK-LABEL: shuffle_i16_67452301:
23; CHECK:       @ %bb.0: @ %entry
24; CHECK-NEXT:    vmov.f32 s4, s3
25; CHECK-NEXT:    vmov.f32 s5, s2
26; CHECK-NEXT:    vmov.f32 s6, s1
27; CHECK-NEXT:    vmov.f32 s7, s0
28; CHECK-NEXT:    vmov q0, q1
29; CHECK-NEXT:    bx lr
30entry:
31  %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1>
32  ret <8 x i16> %out
33}
34
35define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_76543210(<8 x i16> %s1, <8 x i16> %s2) {
36; CHECK-LABEL: shuffle_i16_76543210:
37; CHECK:       @ %bb.0: @ %entry
38; CHECK-NEXT:    vrev64.16 q1, q0
39; CHECK-NEXT:    vmov.f32 s0, s6
40; CHECK-NEXT:    vmov.f32 s1, s7
41; CHECK-NEXT:    vmov.f32 s2, s4
42; CHECK-NEXT:    vmov.f32 s3, s5
43; CHECK-NEXT:    bx lr
44entry:
45  %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
46  ret <8 x i16> %out
47}
48
49define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_01234567(<8 x i16> %s1, <8 x i16> %s2) {
50; CHECK-LABEL: shuffle_i16_01234567:
51; CHECK:       @ %bb.0: @ %entry
52; CHECK-NEXT:    bx lr
53entry:
54  %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
55  ret <8 x i16> %out
56}
57
58define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_0123cdef(<8 x i16> %s1, <8 x i16> %s2) {
59; CHECK-LABEL: shuffle_i16_0123cdef:
60; CHECK:       @ %bb.0: @ %entry
61; CHECK-NEXT:    vmov.f32 s2, s6
62; CHECK-NEXT:    vmov.f32 s3, s7
63; CHECK-NEXT:    bx lr
64entry:
65  %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
66  ret <8 x i16> %out
67}
68
69define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_u7u5u3u1(<8 x i16> %s1, <8 x i16> %s2) {
70; CHECK-LABEL: shuffle_i16_u7u5u3u1:
71; CHECK:       @ %bb.0: @ %entry
72; CHECK-NEXT:    vmov.f32 s4, s3
73; CHECK-NEXT:    vmov.f32 s5, s2
74; CHECK-NEXT:    vmov.f32 s6, s1
75; CHECK-NEXT:    vmov.f32 s7, s0
76; CHECK-NEXT:    vmov q0, q1
77; CHECK-NEXT:    bx lr
78entry:
79  %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> <i32 undef, i32 7, i32 undef, i32 5, i32 undef, i32 3, i32 undef, i32 1>
80  ret <8 x i16> %out
81}
82
83define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_6u4u2u0u(<8 x i16> %s1, <8 x i16> %s2) {
84; CHECK-LABEL: shuffle_i16_6u4u2u0u:
85; CHECK:       @ %bb.0: @ %entry
86; CHECK-NEXT:    vmov.f32 s4, s3
87; CHECK-NEXT:    vmov.f32 s5, s2
88; CHECK-NEXT:    vmov.f32 s6, s1
89; CHECK-NEXT:    vmov.f32 s7, s0
90; CHECK-NEXT:    vmov q0, q1
91; CHECK-NEXT:    bx lr
92entry:
93  %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> <i32 6, i32 undef, i32 4, i32 undef, i32 2, i32 undef, i32 0, i32 undef>
94  ret <8 x i16> %out
95}
96
97define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_0uuuuuuu(<8 x i16> %s1, <8 x i16> %s2) {
98; CHECK-LABEL: shuffle_i16_0uuuuuuu:
99; CHECK:       @ %bb.0: @ %entry
100; CHECK-NEXT:    bx lr
101entry:
102  %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
103  ret <8 x i16> %out
104}
105
106define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_uuuu0uuu(<8 x i16> %s1, <8 x i16> %s2) {
107; CHECK-LABEL: shuffle_i16_uuuu0uuu:
108; CHECK:       @ %bb.0: @ %entry
109; CHECK-NEXT:    vmov.u16 r0, q0[0]
110; CHECK-NEXT:    vdup.16 q0, r0
111; CHECK-NEXT:    bx lr
112entry:
113  %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef>
114  ret <8 x i16> %out
115}
116
117
118; i8
119
120define arm_aapcs_vfpcc <16 x i8> @shuffle_i8_cdef89ab45670123(<16 x i8> %s1, <16 x i8> %s2) {
121; CHECK-LABEL: shuffle_i8_cdef89ab45670123:
122; CHECK:       @ %bb.0: @ %entry
123; CHECK-NEXT:    vmov.f32 s4, s3
124; CHECK-NEXT:    vmov.f32 s5, s2
125; CHECK-NEXT:    vmov.f32 s6, s1
126; CHECK-NEXT:    vmov.f32 s7, s0
127; CHECK-NEXT:    vmov q0, q1
128; CHECK-NEXT:    bx lr
129entry:
130  %out = shufflevector <16 x i8> %s1, <16 x i8> %s2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
131  ret <16 x i8> %out
132}
133
134define arm_aapcs_vfpcc <16 x i8> @shuffle_i8_efcdab8967452301(<16 x i8> %s1, <16 x i8> %s2) {
135; CHECK-LABEL: shuffle_i8_efcdab8967452301:
136; CHECK:       @ %bb.0: @ %entry
137; CHECK-NEXT:    vmov q1, q0
138; CHECK-NEXT:    vmov.u8 r0, q0[14]
139; CHECK-NEXT:    vmov.8 q0[0], r0
140; CHECK-NEXT:    vmov.u8 r0, q1[15]
141; CHECK-NEXT:    vmov.8 q0[1], r0
142; CHECK-NEXT:    vmov.u8 r0, q1[12]
143; CHECK-NEXT:    vmov.8 q0[2], r0
144; CHECK-NEXT:    vmov.u8 r0, q1[13]
145; CHECK-NEXT:    vmov.8 q0[3], r0
146; CHECK-NEXT:    vmov.u8 r0, q1[10]
147; CHECK-NEXT:    vmov.8 q0[4], r0
148; CHECK-NEXT:    vmov.u8 r0, q1[11]
149; CHECK-NEXT:    vmov.8 q0[5], r0
150; CHECK-NEXT:    vmov.u8 r0, q1[8]
151; CHECK-NEXT:    vmov.8 q0[6], r0
152; CHECK-NEXT:    vmov.u8 r0, q1[9]
153; CHECK-NEXT:    vmov.8 q0[7], r0
154; CHECK-NEXT:    vmov.u8 r0, q1[6]
155; CHECK-NEXT:    vmov.8 q0[8], r0
156; CHECK-NEXT:    vmov.u8 r0, q1[7]
157; CHECK-NEXT:    vmov.8 q0[9], r0
158; CHECK-NEXT:    vmov.u8 r0, q1[4]
159; CHECK-NEXT:    vmov.8 q0[10], r0
160; CHECK-NEXT:    vmov.u8 r0, q1[5]
161; CHECK-NEXT:    vmov.8 q0[11], r0
162; CHECK-NEXT:    vmov.u8 r0, q1[2]
163; CHECK-NEXT:    vmov.8 q0[12], r0
164; CHECK-NEXT:    vmov.u8 r0, q1[3]
165; CHECK-NEXT:    vmov.8 q0[13], r0
166; CHECK-NEXT:    vmov.u8 r0, q1[0]
167; CHECK-NEXT:    vmov.8 q0[14], r0
168; CHECK-NEXT:    vmov.u8 r0, q1[1]
169; CHECK-NEXT:    vmov.8 q0[15], r0
170; CHECK-NEXT:    bx lr
171entry:
172  %out = shufflevector <16 x i8> %s1, <16 x i8> %s2, <16 x i32> <i32 14, i32 15, i32 12, i32 13, i32 10, i32 11, i32 8, i32 9, i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1>
173  ret <16 x i8> %out
174}
175
176define arm_aapcs_vfpcc <16 x i8> @shuffle_i8_fedcba9876543210(<16 x i8> %s1, <16 x i8> %s2) {
177; CHECK-LABEL: shuffle_i8_fedcba9876543210:
178; CHECK:       @ %bb.0: @ %entry
179; CHECK-NEXT:    vrev64.8 q1, q0
180; CHECK-NEXT:    vmov.f32 s0, s6
181; CHECK-NEXT:    vmov.f32 s1, s7
182; CHECK-NEXT:    vmov.f32 s2, s4
183; CHECK-NEXT:    vmov.f32 s3, s5
184; CHECK-NEXT:    bx lr
185entry:
186  %out = shufflevector <16 x i8> %s1, <16 x i8> %s2, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
187  ret <16 x i8> %out
188}
189
190define arm_aapcs_vfpcc <16 x i8> @shuffle_i8_0123456789abcdef(<16 x i8> %s1, <16 x i8> %s2) {
191; CHECK-LABEL: shuffle_i8_0123456789abcdef:
192; CHECK:       @ %bb.0: @ %entry
193; CHECK-NEXT:    bx lr
194entry:
195  %out = shufflevector <16 x i8> %s1, <16 x i8> %s2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
196  ret <16 x i8> %out
197}
198
199define arm_aapcs_vfpcc <16 x i8> @shuffle_i8_0123ghij4567klmn(<16 x i8> %s1, <16 x i8> %s2) {
200; CHECK-LABEL: shuffle_i8_0123ghij4567klmn:
201; CHECK:       @ %bb.0: @ %entry
202; CHECK-NEXT:    vmov.f32 s8, s0
203; CHECK-NEXT:    vmov.f32 s9, s4
204; CHECK-NEXT:    vmov.f32 s10, s1
205; CHECK-NEXT:    vmov.f32 s11, s5
206; CHECK-NEXT:    vmov q0, q2
207; CHECK-NEXT:    bx lr
208entry:
209  %out = shufflevector <16 x i8> %s1, <16 x i8> %s2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23>
210  ret <16 x i8> %out
211}
212
213define arm_aapcs_vfpcc <16 x i8> @shuffle_i8_cdeu89ub4u67u123(<16 x i8> %s1, <16 x i8> %s2) {
214; CHECK-LABEL: shuffle_i8_cdeu89ub4u67u123:
215; CHECK:       @ %bb.0: @ %entry
216; CHECK-NEXT:    vmov.f32 s4, s3
217; CHECK-NEXT:    vmov.f32 s5, s2
218; CHECK-NEXT:    vmov.f32 s6, s1
219; CHECK-NEXT:    vmov.f32 s7, s0
220; CHECK-NEXT:    vmov q0, q1
221; CHECK-NEXT:    bx lr
222entry:
223  %out = shufflevector <16 x i8> %s1, <16 x i8> %s2, <16 x i32> <i32 12, i32 13, i32 14, i32 undef, i32 8, i32 9, i32 undef, i32 11, i32 4, i32 undef, i32 6, i32 7, i32 undef, i32 1, i32 2, i32 3>
224  ret <16 x i8> %out
225}
226
227define arm_aapcs_vfpcc <16 x i8> @shuffle_i8_cduu8uubuu67u12u(<16 x i8> %s1, <16 x i8> %s2) {
228; CHECK-LABEL: shuffle_i8_cduu8uubuu67u12u:
229; CHECK:       @ %bb.0: @ %entry
230; CHECK-NEXT:    vmov.f32 s4, s3
231; CHECK-NEXT:    vmov.f32 s5, s2
232; CHECK-NEXT:    vmov.f32 s6, s1
233; CHECK-NEXT:    vmov.f32 s7, s0
234; CHECK-NEXT:    vmov q0, q1
235; CHECK-NEXT:    bx lr
236entry:
237  %out = shufflevector <16 x i8> %s1, <16 x i8> %s2, <16 x i32> <i32 12, i32 13, i32 undef, i32 undef, i32 8, i32 undef, i32 undef, i32 11, i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 1, i32 2, i32 undef>
238  ret <16 x i8> %out
239}
240
241define arm_aapcs_vfpcc <16 x i8> @shuffle_i8_cuuuuuubuu6uuu2u(<16 x i8> %s1, <16 x i8> %s2) {
242; CHECK-LABEL: shuffle_i8_cuuuuuubuu6uuu2u:
243; CHECK:       @ %bb.0: @ %entry
244; CHECK-NEXT:    vmov.f32 s4, s3
245; CHECK-NEXT:    vmov.f32 s5, s2
246; CHECK-NEXT:    vmov.f32 s6, s1
247; CHECK-NEXT:    vmov.f32 s7, s0
248; CHECK-NEXT:    vmov q0, q1
249; CHECK-NEXT:    bx lr
250entry:
251  %out = shufflevector <16 x i8> %s1, <16 x i8> %s2, <16 x i32> <i32 12, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 11, i32 undef, i32 undef, i32 6, i32 undef, i32 undef, i32 undef, i32 2, i32 undef>
252  ret <16 x i8> %out
253}
254
255define arm_aapcs_vfpcc <16 x i8> @shuffle_i8_cdef89ab45u700123(<16 x i8> %s1, <16 x i8> %s2) {
256; CHECK-LABEL: shuffle_i8_cdef89ab45u700123:
257; CHECK:       @ %bb.0: @ %entry
258; CHECK-NEXT:    vmov.u8 r0, q0[4]
259; CHECK-NEXT:    vmov.8 q1[8], r0
260; CHECK-NEXT:    vmov.u8 r0, q0[5]
261; CHECK-NEXT:    vmov.8 q1[9], r0
262; CHECK-NEXT:    vmov.u8 r0, q0[0]
263; CHECK-NEXT:    vmov.8 q1[11], r0
264; CHECK-NEXT:    vmov.f32 s4, s3
265; CHECK-NEXT:    vmov.f32 s5, s2
266; CHECK-NEXT:    vmov.f32 s7, s0
267; CHECK-NEXT:    vmov q0, q1
268; CHECK-NEXT:    bx lr
269entry:
270  %out = shufflevector <16 x i8> %s1, <16 x i8> %s2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 undef, i32 0, i32 0, i32 1, i32 2, i32 3>
271  ret <16 x i8> %out
272}
273
274
275
276; f16
277
278define arm_aapcs_vfpcc <8 x half> @shuffle_f16_45670123(<8 x half> %s1, <8 x half> %s2) {
279; CHECK-LABEL: shuffle_f16_45670123:
280; CHECK:       @ %bb.0: @ %entry
281; CHECK-NEXT:    vmov.f32 s4, s2
282; CHECK-NEXT:    vmov.f32 s6, s0
283; CHECK-NEXT:    vmov.f32 s5, s3
284; CHECK-NEXT:    vmov.f32 s7, s1
285; CHECK-NEXT:    vmov q0, q1
286; CHECK-NEXT:    bx lr
287entry:
288  %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
289  ret <8 x half> %out
290}
291
292define arm_aapcs_vfpcc <8 x half> @shuffle_f16_67452301(<8 x half> %s1, <8 x half> %s2) {
293; CHECK-LABEL: shuffle_f16_67452301:
294; CHECK:       @ %bb.0: @ %entry
295; CHECK-NEXT:    vmov.f32 s4, s3
296; CHECK-NEXT:    vmov.f32 s5, s2
297; CHECK-NEXT:    vmov.f32 s6, s1
298; CHECK-NEXT:    vmov.f32 s7, s0
299; CHECK-NEXT:    vmov q0, q1
300; CHECK-NEXT:    bx lr
301entry:
302  %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1>
303  ret <8 x half> %out
304}
305
306define arm_aapcs_vfpcc <8 x half> @shuffle_f16_76543210(<8 x half> %s1, <8 x half> %s2) {
307; CHECK-LABEL: shuffle_f16_76543210:
308; CHECK:       @ %bb.0: @ %entry
309; CHECK-NEXT:    vrev64.16 q1, q0
310; CHECK-NEXT:    vmov.f32 s0, s6
311; CHECK-NEXT:    vmov.f32 s1, s7
312; CHECK-NEXT:    vmov.f32 s2, s4
313; CHECK-NEXT:    vmov.f32 s3, s5
314; CHECK-NEXT:    bx lr
315entry:
316  %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
317  ret <8 x half> %out
318}
319
320define arm_aapcs_vfpcc <8 x half> @shuffle_f16_01234567(<8 x half> %s1, <8 x half> %s2) {
321; CHECK-LABEL: shuffle_f16_01234567:
322; CHECK:       @ %bb.0: @ %entry
323; CHECK-NEXT:    bx lr
324entry:
325  %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
326  ret <8 x half> %out
327}
328
329define arm_aapcs_vfpcc <8 x half> @shuffle_f16_0123cdef(<8 x half> %s1, <8 x half> %s2) {
330; CHECK-LABEL: shuffle_f16_0123cdef:
331; CHECK:       @ %bb.0: @ %entry
332; CHECK-NEXT:    vmov.f32 s2, s6
333; CHECK-NEXT:    vmov.f32 s3, s7
334; CHECK-NEXT:    bx lr
335entry:
336  %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
337  ret <8 x half> %out
338}
339
340define arm_aapcs_vfpcc <8 x half> @shuffle_f16_u7u5u3u1(<8 x half> %s1, <8 x half> %s2) {
341; CHECK-LABEL: shuffle_f16_u7u5u3u1:
342; CHECK:       @ %bb.0: @ %entry
343; CHECK-NEXT:    vmov.f32 s4, s3
344; CHECK-NEXT:    vmov.f32 s5, s2
345; CHECK-NEXT:    vmov.f32 s6, s1
346; CHECK-NEXT:    vmov.f32 s7, s0
347; CHECK-NEXT:    vmov q0, q1
348; CHECK-NEXT:    bx lr
349entry:
350  %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> <i32 undef, i32 7, i32 undef, i32 5, i32 undef, i32 3, i32 undef, i32 1>
351  ret <8 x half> %out
352}
353
354define arm_aapcs_vfpcc <8 x half> @shuffle_f16_6u4u2u0u(<8 x half> %s1, <8 x half> %s2) {
355; CHECK-LABEL: shuffle_f16_6u4u2u0u:
356; CHECK:       @ %bb.0: @ %entry
357; CHECK-NEXT:    vmov.f32 s4, s3
358; CHECK-NEXT:    vmov.f32 s5, s2
359; CHECK-NEXT:    vmov.f32 s6, s1
360; CHECK-NEXT:    vmov.f32 s7, s0
361; CHECK-NEXT:    vmov q0, q1
362; CHECK-NEXT:    bx lr
363entry:
364  %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> <i32 6, i32 undef, i32 4, i32 undef, i32 2, i32 undef, i32 0, i32 undef>
365  ret <8 x half> %out
366}
367
368define arm_aapcs_vfpcc <8 x half> @shuffle_f16_0uuuuuuu(<8 x half> %s1, <8 x half> %s2) {
369; CHECK-LABEL: shuffle_f16_0uuuuuuu:
370; CHECK:       @ %bb.0: @ %entry
371; CHECK-NEXT:    bx lr
372entry:
373  %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
374  ret <8 x half> %out
375}
376
377define arm_aapcs_vfpcc <8 x half> @shuffle_f16_uuuu0uuu(<8 x half> %s1, <8 x half> %s2) {
378; CHECK-LABEL: shuffle_f16_uuuu0uuu:
379; CHECK:       @ %bb.0: @ %entry
380; CHECK-NEXT:    vmov.u16 r0, q0[0]
381; CHECK-NEXT:    vdup.16 q0, r0
382; CHECK-NEXT:    bx lr
383entry:
384  %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef>
385  ret <8 x half> %out
386}
387