xref: /llvm-project/llvm/test/CodeGen/Thumb2/mve-vdup.ll (revision 8998ff53c91687b1065d095f6ac0ad7578131d73)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LE
3; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LE
4; RUN: llc -mtriple=thumbebv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-BE
5
6define arm_aapcs_vfpcc <4 x i32> @vdup_i32(i32 %src) {
7; CHECK-LE-LABEL: vdup_i32:
8; CHECK-LE:       @ %bb.0: @ %entry
9; CHECK-LE-NEXT:    vdup.32 q0, r0
10; CHECK-LE-NEXT:    bx lr
11;
12; CHECK-BE-LABEL: vdup_i32:
13; CHECK-BE:       @ %bb.0: @ %entry
14; CHECK-BE-NEXT:    vdup.32 q1, r0
15; CHECK-BE-NEXT:    vrev64.32 q0, q1
16; CHECK-BE-NEXT:    bx lr
17entry:
18  %0 = insertelement <4 x i32> undef, i32 %src, i32 0
19  %out = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> zeroinitializer
20  ret <4 x i32> %out
21}
22
23define arm_aapcs_vfpcc <8 x i16> @vdup_i16(i16 %src) {
24; CHECK-LE-LABEL: vdup_i16:
25; CHECK-LE:       @ %bb.0: @ %entry
26; CHECK-LE-NEXT:    vdup.16 q0, r0
27; CHECK-LE-NEXT:    bx lr
28;
29; CHECK-BE-LABEL: vdup_i16:
30; CHECK-BE:       @ %bb.0: @ %entry
31; CHECK-BE-NEXT:    vdup.16 q1, r0
32; CHECK-BE-NEXT:    vrev64.16 q0, q1
33; CHECK-BE-NEXT:    bx lr
34entry:
35  %0 = insertelement <8 x i16> undef, i16 %src, i32 0
36  %out = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> zeroinitializer
37  ret <8 x i16> %out
38}
39
40define arm_aapcs_vfpcc <16 x i8> @vdup_i8(i8 %src) {
41; CHECK-LE-LABEL: vdup_i8:
42; CHECK-LE:       @ %bb.0: @ %entry
43; CHECK-LE-NEXT:    vdup.8 q0, r0
44; CHECK-LE-NEXT:    bx lr
45;
46; CHECK-BE-LABEL: vdup_i8:
47; CHECK-BE:       @ %bb.0: @ %entry
48; CHECK-BE-NEXT:    vdup.8 q1, r0
49; CHECK-BE-NEXT:    vrev64.8 q0, q1
50; CHECK-BE-NEXT:    bx lr
51entry:
52  %0 = insertelement <16 x i8> undef, i8 %src, i32 0
53  %out = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> zeroinitializer
54  ret <16 x i8> %out
55}
56
57define arm_aapcs_vfpcc <2 x i64> @vdup_i64(i64 %src) {
58; CHECK-LE-LABEL: vdup_i64:
59; CHECK-LE:       @ %bb.0: @ %entry
60; CHECK-LE-NEXT:    vmov q0[2], q0[0], r0, r0
61; CHECK-LE-NEXT:    vmov q0[3], q0[1], r1, r1
62; CHECK-LE-NEXT:    bx lr
63;
64; CHECK-BE-LABEL: vdup_i64:
65; CHECK-BE:       @ %bb.0: @ %entry
66; CHECK-BE-NEXT:    vmov q1[2], q1[0], r0, r0
67; CHECK-BE-NEXT:    vmov q1[3], q1[1], r1, r1
68; CHECK-BE-NEXT:    vrev64.32 q0, q1
69; CHECK-BE-NEXT:    bx lr
70entry:
71  %0 = insertelement <2 x i64> undef, i64 %src, i32 0
72  %out = shufflevector <2 x i64> %0, <2 x i64> undef, <2 x i32> zeroinitializer
73  ret <2 x i64> %out
74}
75
76define arm_aapcs_vfpcc <4 x float> @vdup_f32_1(float %src) {
77; CHECK-LE-LABEL: vdup_f32_1:
78; CHECK-LE:       @ %bb.0: @ %entry
79; CHECK-LE-NEXT:    vmov r0, s0
80; CHECK-LE-NEXT:    vdup.32 q0, r0
81; CHECK-LE-NEXT:    bx lr
82;
83; CHECK-BE-LABEL: vdup_f32_1:
84; CHECK-BE:       @ %bb.0: @ %entry
85; CHECK-BE-NEXT:    vmov r0, s0
86; CHECK-BE-NEXT:    vdup.32 q1, r0
87; CHECK-BE-NEXT:    vrev64.32 q0, q1
88; CHECK-BE-NEXT:    bx lr
89entry:
90  %0 = insertelement <4 x float> undef, float %src, i32 0
91  %out = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> zeroinitializer
92  ret <4 x float> %out
93}
94
95define arm_aapcs_vfpcc <4 x float> @vdup_f32_2(float %src1, float %src2) {
96; CHECK-LE-LABEL: vdup_f32_2:
97; CHECK-LE:       @ %bb.0: @ %entry
98; CHECK-LE-NEXT:    vadd.f32 s0, s0, s1
99; CHECK-LE-NEXT:    vmov r0, s0
100; CHECK-LE-NEXT:    vdup.32 q0, r0
101; CHECK-LE-NEXT:    bx lr
102;
103; CHECK-BE-LABEL: vdup_f32_2:
104; CHECK-BE:       @ %bb.0: @ %entry
105; CHECK-BE-NEXT:    vadd.f32 s0, s0, s1
106; CHECK-BE-NEXT:    vmov r0, s0
107; CHECK-BE-NEXT:    vdup.32 q1, r0
108; CHECK-BE-NEXT:    vrev64.32 q0, q1
109; CHECK-BE-NEXT:    bx lr
110entry:
111  %0 = fadd float %src1, %src2
112  %1 = insertelement <4 x float> undef, float %0, i32 0
113  %out = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer
114  ret <4 x float> %out
115}
116
117define arm_aapcs_vfpcc <4 x float> @vdup_f32_1bc(float %src) {
118; CHECK-LE-LABEL: vdup_f32_1bc:
119; CHECK-LE:       @ %bb.0: @ %entry
120; CHECK-LE-NEXT:    vmov r0, s0
121; CHECK-LE-NEXT:    vdup.32 q0, r0
122; CHECK-LE-NEXT:    bx lr
123;
124; CHECK-BE-LABEL: vdup_f32_1bc:
125; CHECK-BE:       @ %bb.0: @ %entry
126; CHECK-BE-NEXT:    vmov r0, s0
127; CHECK-BE-NEXT:    vdup.32 q1, r0
128; CHECK-BE-NEXT:    vrev64.32 q0, q1
129; CHECK-BE-NEXT:    bx lr
130entry:
131  %srcbc = bitcast float %src to i32
132  %0 = insertelement <4 x i32> undef, i32 %srcbc, i32 0
133  %out = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> zeroinitializer
134  %outbc = bitcast <4 x i32> %out to <4 x float>
135  ret <4 x float> %outbc
136}
137
138define arm_aapcs_vfpcc <4 x float> @vdup_f32_2bc(float %src1, float %src2) {
139; CHECK-LE-LABEL: vdup_f32_2bc:
140; CHECK-LE:       @ %bb.0: @ %entry
141; CHECK-LE-NEXT:    vadd.f32 s0, s0, s1
142; CHECK-LE-NEXT:    vmov r0, s0
143; CHECK-LE-NEXT:    vdup.32 q0, r0
144; CHECK-LE-NEXT:    bx lr
145;
146; CHECK-BE-LABEL: vdup_f32_2bc:
147; CHECK-BE:       @ %bb.0: @ %entry
148; CHECK-BE-NEXT:    vadd.f32 s0, s0, s1
149; CHECK-BE-NEXT:    vmov r0, s0
150; CHECK-BE-NEXT:    vdup.32 q1, r0
151; CHECK-BE-NEXT:    vrev64.32 q0, q1
152; CHECK-BE-NEXT:    bx lr
153entry:
154  %0 = fadd float %src1, %src2
155  %bc = bitcast float %0 to i32
156  %1 = insertelement <4 x i32> undef, i32 %bc, i32 0
157  %out = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer
158  %outbc = bitcast <4 x i32> %out to <4 x float>
159  ret <4 x float> %outbc
160}
161
162define arm_aapcs_vfpcc <8 x half> @vdup_f16(half %0, half %1) {
163; CHECK-LE-LABEL: vdup_f16:
164; CHECK-LE:       @ %bb.0: @ %entry
165; CHECK-LE-NEXT:    vadd.f16 s0, s0, s1
166; CHECK-LE-NEXT:    vmov.f16 r0, s0
167; CHECK-LE-NEXT:    vdup.16 q0, r0
168; CHECK-LE-NEXT:    bx lr
169;
170; CHECK-BE-LABEL: vdup_f16:
171; CHECK-BE:       @ %bb.0: @ %entry
172; CHECK-BE-NEXT:    vadd.f16 s0, s0, s1
173; CHECK-BE-NEXT:    vmov.f16 r0, s0
174; CHECK-BE-NEXT:    vdup.16 q1, r0
175; CHECK-BE-NEXT:    vrev64.16 q0, q1
176; CHECK-BE-NEXT:    bx lr
177entry:
178  %2 = fadd half %0, %1
179  %3 = insertelement <8 x half> undef, half %2, i32 0
180  %out = shufflevector <8 x half> %3, <8 x half> undef, <8 x i32> zeroinitializer
181  ret <8 x half> %out
182}
183
184define arm_aapcs_vfpcc <8 x half> @vdup_f16_bc(half %0, half %1) {
185; CHECK-LE-LABEL: vdup_f16_bc:
186; CHECK-LE:       @ %bb.0: @ %entry
187; CHECK-LE-NEXT:    vadd.f16 s0, s0, s1
188; CHECK-LE-NEXT:    vmov.f16 r0, s0
189; CHECK-LE-NEXT:    vdup.16 q0, r0
190; CHECK-LE-NEXT:    bx lr
191;
192; CHECK-BE-LABEL: vdup_f16_bc:
193; CHECK-BE:       @ %bb.0: @ %entry
194; CHECK-BE-NEXT:    vadd.f16 s0, s0, s1
195; CHECK-BE-NEXT:    vmov.f16 r0, s0
196; CHECK-BE-NEXT:    vdup.16 q1, r0
197; CHECK-BE-NEXT:    vrev64.16 q0, q1
198; CHECK-BE-NEXT:    bx lr
199entry:
200  %2 = fadd half %0, %1
201  %bc = bitcast half %2 to i16
202  %3 = insertelement <8 x i16> undef, i16 %bc, i32 0
203  %out = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> zeroinitializer
204  %outbc = bitcast <8 x i16> %out to <8 x half>
205  ret <8 x half> %outbc
206}
207
208define arm_aapcs_vfpcc <2 x double> @vdup_f64(double %src) {
209; CHECK-LABEL: vdup_f64:
210; CHECK:       @ %bb.0: @ %entry
211; CHECK-NEXT:    vmov.f32 s2, s0
212; CHECK-NEXT:    vmov.f32 s3, s1
213; CHECK-NEXT:    bx lr
214entry:
215  %0 = insertelement <2 x double> undef, double %src, i32 0
216  %out = shufflevector <2 x double> %0, <2 x double> undef, <2 x i32> zeroinitializer
217  ret <2 x double> %out
218}
219
220
221
222define arm_aapcs_vfpcc <4 x i32> @vduplane_i32(<4 x i32> %src) {
223; CHECK-LE-LABEL: vduplane_i32:
224; CHECK-LE:       @ %bb.0: @ %entry
225; CHECK-LE-NEXT:    vmov r0, s3
226; CHECK-LE-NEXT:    vdup.32 q0, r0
227; CHECK-LE-NEXT:    bx lr
228;
229; CHECK-BE-LABEL: vduplane_i32:
230; CHECK-BE:       @ %bb.0: @ %entry
231; CHECK-BE-NEXT:    vrev64.32 q1, q0
232; CHECK-BE-NEXT:    vmov r0, s7
233; CHECK-BE-NEXT:    vdup.32 q1, r0
234; CHECK-BE-NEXT:    vrev64.32 q0, q1
235; CHECK-BE-NEXT:    bx lr
236entry:
237  %out = shufflevector <4 x i32> %src, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
238  ret <4 x i32> %out
239}
240
241define arm_aapcs_vfpcc <8 x i16> @vduplane_i16(<8 x i16> %src) {
242; CHECK-LE-LABEL: vduplane_i16:
243; CHECK-LE:       @ %bb.0: @ %entry
244; CHECK-LE-NEXT:    vmov.u16 r0, q0[3]
245; CHECK-LE-NEXT:    vdup.16 q0, r0
246; CHECK-LE-NEXT:    bx lr
247;
248; CHECK-BE-LABEL: vduplane_i16:
249; CHECK-BE:       @ %bb.0: @ %entry
250; CHECK-BE-NEXT:    vrev64.16 q1, q0
251; CHECK-BE-NEXT:    vmov.u16 r0, q1[3]
252; CHECK-BE-NEXT:    vdup.16 q1, r0
253; CHECK-BE-NEXT:    vrev64.16 q0, q1
254; CHECK-BE-NEXT:    bx lr
255entry:
256  %out = shufflevector <8 x i16> %src, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
257  ret <8 x i16> %out
258}
259
260define arm_aapcs_vfpcc <16 x i8> @vduplane_i8(<16 x i8> %src) {
261; CHECK-LE-LABEL: vduplane_i8:
262; CHECK-LE:       @ %bb.0: @ %entry
263; CHECK-LE-NEXT:    vmov.u8 r0, q0[3]
264; CHECK-LE-NEXT:    vdup.8 q0, r0
265; CHECK-LE-NEXT:    bx lr
266;
267; CHECK-BE-LABEL: vduplane_i8:
268; CHECK-BE:       @ %bb.0: @ %entry
269; CHECK-BE-NEXT:    vrev64.8 q1, q0
270; CHECK-BE-NEXT:    vmov.u8 r0, q1[3]
271; CHECK-BE-NEXT:    vdup.8 q1, r0
272; CHECK-BE-NEXT:    vrev64.8 q0, q1
273; CHECK-BE-NEXT:    bx lr
274entry:
275  %out = shufflevector <16 x i8> %src, <16 x i8> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
276  ret <16 x i8> %out
277}
278
279define arm_aapcs_vfpcc <2 x i64> @vduplane_i64(<2 x i64> %src) {
280; CHECK-LABEL: vduplane_i64:
281; CHECK:       @ %bb.0: @ %entry
282; CHECK-NEXT:    vmov.f32 s0, s2
283; CHECK-NEXT:    vmov.f32 s1, s3
284; CHECK-NEXT:    bx lr
285entry:
286  %out = shufflevector <2 x i64> %src, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
287  ret <2 x i64> %out
288}
289
290define arm_aapcs_vfpcc <4 x float> @vduplane_f32(<4 x float> %src) {
291; CHECK-LE-LABEL: vduplane_f32:
292; CHECK-LE:       @ %bb.0: @ %entry
293; CHECK-LE-NEXT:    vmov r0, s3
294; CHECK-LE-NEXT:    vdup.32 q0, r0
295; CHECK-LE-NEXT:    bx lr
296;
297; CHECK-BE-LABEL: vduplane_f32:
298; CHECK-BE:       @ %bb.0: @ %entry
299; CHECK-BE-NEXT:    vrev64.32 q1, q0
300; CHECK-BE-NEXT:    vmov r0, s7
301; CHECK-BE-NEXT:    vdup.32 q1, r0
302; CHECK-BE-NEXT:    vrev64.32 q0, q1
303; CHECK-BE-NEXT:    bx lr
304entry:
305  %out = shufflevector <4 x float> %src, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
306  ret <4 x float> %out
307}
308
309define arm_aapcs_vfpcc <8 x half> @vduplane_f16(<8 x half> %src) {
310; CHECK-LE-LABEL: vduplane_f16:
311; CHECK-LE:       @ %bb.0: @ %entry
312; CHECK-LE-NEXT:    vmov.u16 r0, q0[3]
313; CHECK-LE-NEXT:    vdup.16 q0, r0
314; CHECK-LE-NEXT:    bx lr
315;
316; CHECK-BE-LABEL: vduplane_f16:
317; CHECK-BE:       @ %bb.0: @ %entry
318; CHECK-BE-NEXT:    vrev64.16 q1, q0
319; CHECK-BE-NEXT:    vmov.u16 r0, q1[3]
320; CHECK-BE-NEXT:    vdup.16 q1, r0
321; CHECK-BE-NEXT:    vrev64.16 q0, q1
322; CHECK-BE-NEXT:    bx lr
323entry:
324  %out = shufflevector <8 x half> %src, <8 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
325  ret <8 x half> %out
326}
327
328define arm_aapcs_vfpcc <2 x double> @vduplane_f64(<2 x double> %src) {
329; CHECK-LABEL: vduplane_f64:
330; CHECK:       @ %bb.0: @ %entry
331; CHECK-NEXT:    vmov.f32 s0, s2
332; CHECK-NEXT:    vmov.f32 s1, s3
333; CHECK-NEXT:    bx lr
334entry:
335  %out = shufflevector <2 x double> %src, <2 x double> undef, <2 x i32> <i32 1, i32 1>
336  ret <2 x double> %out
337}
338
339
340define arm_aapcs_vfpcc float @vdup_f32_extract(float %src) {
341; CHECK-LABEL: vdup_f32_extract:
342; CHECK:       @ %bb.0: @ %entry
343; CHECK-NEXT:    bx lr
344entry:
345  %srcbc = bitcast float %src to i32
346  %0 = insertelement <4 x i32> undef, i32 %srcbc, i32 0
347  %out = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> zeroinitializer
348  %outbc = bitcast <4 x i32> %out to <4 x float>
349  %ext = extractelement <4 x float> %outbc, i32 2
350  ret float %ext
351}
352
353define arm_aapcs_vfpcc half @vdup_f16_extract(half %0, half %1) {
354; CHECK-LABEL: vdup_f16_extract:
355; CHECK:       @ %bb.0: @ %entry
356; CHECK-NEXT:    vadd.f16 s0, s0, s1
357; CHECK-NEXT:    bx lr
358entry:
359  %2 = fadd half %0, %1
360  %bc = bitcast half %2 to i16
361  %3 = insertelement <8 x i16> undef, i16 %bc, i32 0
362  %out = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> zeroinitializer
363  %outbc = bitcast <8 x i16> %out to <8 x half>
364  %ext = extractelement <8 x half> %outbc, i32 2
365  ret half %ext
366}
367
368
369define arm_aapcs_vfpcc <8 x i16> @bitcast_i64_v8i16(i64 %a) {
370; CHECK-LE-LABEL: bitcast_i64_v8i16:
371; CHECK-LE:       @ %bb.0:
372; CHECK-LE-NEXT:    .pad #8
373; CHECK-LE-NEXT:    sub sp, #8
374; CHECK-LE-NEXT:    strd r0, r1, [sp]
375; CHECK-LE-NEXT:    mov r0, sp
376; CHECK-LE-NEXT:    vldrh.u32 q0, [r0]
377; CHECK-LE-NEXT:    vmov r0, s0
378; CHECK-LE-NEXT:    vdup.16 q0, r0
379; CHECK-LE-NEXT:    add sp, #8
380; CHECK-LE-NEXT:    bx lr
381;
382; CHECK-BE-LABEL: bitcast_i64_v8i16:
383; CHECK-BE:       @ %bb.0:
384; CHECK-BE-NEXT:    .pad #8
385; CHECK-BE-NEXT:    sub sp, #8
386; CHECK-BE-NEXT:    strd r0, r1, [sp]
387; CHECK-BE-NEXT:    mov r0, sp
388; CHECK-BE-NEXT:    vldrh.u32 q0, [r0]
389; CHECK-BE-NEXT:    vmov r0, s0
390; CHECK-BE-NEXT:    vdup.16 q1, r0
391; CHECK-BE-NEXT:    vrev64.16 q0, q1
392; CHECK-BE-NEXT:    add sp, #8
393; CHECK-BE-NEXT:    bx lr
394  %b = bitcast i64 %a to <4 x i16>
395  %r = shufflevector <4 x i16> %b, <4 x i16> poison, <8 x i32> zeroinitializer
396  ret <8 x i16> %r
397}
398
399define arm_aapcs_vfpcc <8 x i16> @bitcast_i128_v8i16(i128 %a) {
400; CHECK-LE-LABEL: bitcast_i128_v8i16:
401; CHECK-LE:       @ %bb.0:
402; CHECK-LE-NEXT:    vdup.16 q0, r0
403; CHECK-LE-NEXT:    bx lr
404;
405; CHECK-BE-LABEL: bitcast_i128_v8i16:
406; CHECK-BE:       @ %bb.0:
407; CHECK-BE-NEXT:    vmov.32 q0[0], r0
408; CHECK-BE-NEXT:    vrev32.16 q0, q0
409; CHECK-BE-NEXT:    vmov.u16 r0, q0[0]
410; CHECK-BE-NEXT:    vdup.16 q1, r0
411; CHECK-BE-NEXT:    vrev64.16 q0, q1
412; CHECK-BE-NEXT:    bx lr
413  %b = bitcast i128 %a to <8 x i16>
414  %r = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
415  ret <8 x i16> %r
416}
417
418define arm_aapcs_vfpcc <8 x i16> @bitcast_i64_v8i16_lane1(i64 %a) {
419; CHECK-LE-LABEL: bitcast_i64_v8i16_lane1:
420; CHECK-LE:       @ %bb.0:
421; CHECK-LE-NEXT:    .pad #8
422; CHECK-LE-NEXT:    sub sp, #8
423; CHECK-LE-NEXT:    strd r0, r1, [sp]
424; CHECK-LE-NEXT:    mov r0, sp
425; CHECK-LE-NEXT:    vldrh.u32 q0, [r0]
426; CHECK-LE-NEXT:    vmov r0, s1
427; CHECK-LE-NEXT:    vdup.16 q0, r0
428; CHECK-LE-NEXT:    add sp, #8
429; CHECK-LE-NEXT:    bx lr
430;
431; CHECK-BE-LABEL: bitcast_i64_v8i16_lane1:
432; CHECK-BE:       @ %bb.0:
433; CHECK-BE-NEXT:    .pad #8
434; CHECK-BE-NEXT:    sub sp, #8
435; CHECK-BE-NEXT:    strd r0, r1, [sp]
436; CHECK-BE-NEXT:    mov r0, sp
437; CHECK-BE-NEXT:    vldrh.u32 q0, [r0]
438; CHECK-BE-NEXT:    vmov r0, s1
439; CHECK-BE-NEXT:    vdup.16 q1, r0
440; CHECK-BE-NEXT:    vrev64.16 q0, q1
441; CHECK-BE-NEXT:    add sp, #8
442; CHECK-BE-NEXT:    bx lr
443  %b = bitcast i64 %a to <4 x i16>
444  %r = shufflevector <4 x i16> %b, <4 x i16> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
445  ret <8 x i16> %r
446}
447
448define arm_aapcs_vfpcc <8 x i16> @bitcast_f64_v8i16(double %a) {
449; CHECK-LE-LABEL: bitcast_f64_v8i16:
450; CHECK-LE:       @ %bb.0:
451; CHECK-LE-NEXT:    vmov.u16 r0, q0[0]
452; CHECK-LE-NEXT:    vdup.16 q0, r0
453; CHECK-LE-NEXT:    bx lr
454;
455; CHECK-BE-LABEL: bitcast_f64_v8i16:
456; CHECK-BE:       @ %bb.0:
457; CHECK-BE-NEXT:    vrev64.16 q1, q0
458; CHECK-BE-NEXT:    vmov.u16 r0, q1[0]
459; CHECK-BE-NEXT:    vdup.16 q1, r0
460; CHECK-BE-NEXT:    vrev64.16 q0, q1
461; CHECK-BE-NEXT:    bx lr
462  %b = bitcast double %a to <4 x i16>
463  %r = shufflevector <4 x i16> %b, <4 x i16> poison, <8 x i32> zeroinitializer
464  ret <8 x i16> %r
465}
466
467define arm_aapcs_vfpcc <8 x half> @bitcast_i64_v8f16(i64 %a) {
468; CHECK-LE-LABEL: bitcast_i64_v8f16:
469; CHECK-LE:       @ %bb.0:
470; CHECK-LE-NEXT:    vmov.32 q0[0], r0
471; CHECK-LE-NEXT:    vmov.u16 r0, q0[0]
472; CHECK-LE-NEXT:    vdup.16 q0, r0
473; CHECK-LE-NEXT:    bx lr
474;
475; CHECK-BE-LABEL: bitcast_i64_v8f16:
476; CHECK-BE:       @ %bb.0:
477; CHECK-BE-NEXT:    vmov.32 q0[0], r0
478; CHECK-BE-NEXT:    vrev32.16 q0, q0
479; CHECK-BE-NEXT:    vmov.u16 r0, q0[0]
480; CHECK-BE-NEXT:    vdup.16 q1, r0
481; CHECK-BE-NEXT:    vrev64.16 q0, q1
482; CHECK-BE-NEXT:    bx lr
483  %b = bitcast i64 %a to <4 x half>
484  %r = shufflevector <4 x half> %b, <4 x half> poison, <8 x i32> zeroinitializer
485  ret <8 x half> %r
486}
487
488define arm_aapcs_vfpcc <2 x i64> @bitcast_i64_v2f64(i64 %a) {
489; CHECK-LE-LABEL: bitcast_i64_v2f64:
490; CHECK-LE:       @ %bb.0:
491; CHECK-LE-NEXT:    vmov q0[2], q0[0], r0, r0
492; CHECK-LE-NEXT:    vmov q0[3], q0[1], r1, r1
493; CHECK-LE-NEXT:    bx lr
494;
495; CHECK-BE-LABEL: bitcast_i64_v2f64:
496; CHECK-BE:       @ %bb.0:
497; CHECK-BE-NEXT:    vmov q1[2], q1[0], r0, r0
498; CHECK-BE-NEXT:    vmov q1[3], q1[1], r1, r1
499; CHECK-BE-NEXT:    vrev64.32 q0, q1
500; CHECK-BE-NEXT:    bx lr
501  %b = bitcast i64 %a to <1 x i64>
502  %r = shufflevector <1 x i64> %b, <1 x i64> poison, <2 x i32> zeroinitializer
503  ret <2 x i64> %r
504}
505
506define arm_aapcs_vfpcc <2 x i64> @bitcast_v2f64_v2i64(<2 x double> %a) {
507; CHECK-LABEL: bitcast_v2f64_v2i64:
508; CHECK:       @ %bb.0:
509; CHECK-NEXT:    vmov.f32 s2, s0
510; CHECK-NEXT:    vmov.f32 s3, s1
511; CHECK-NEXT:    bx lr
512  %b = bitcast <2 x double> %a to <2 x i64>
513  %r = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer
514  ret <2 x i64> %r
515}
516
517define arm_aapcs_vfpcc <2 x i64> @bitcast_v8i16_v2i64(<8 x i16> %a) {
518; CHECK-LABEL: bitcast_v8i16_v2i64:
519; CHECK:       @ %bb.0:
520; CHECK-NEXT:    vmov.f32 s2, s0
521; CHECK-NEXT:    vmov.f32 s3, s1
522; CHECK-NEXT:    bx lr
523  %b = bitcast <8 x i16> %a to <2 x i64>
524  %r = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer
525  ret <2 x i64> %r
526}
527
528define arm_aapcs_vfpcc <8 x i16> @bitcast_v2f64_v8i16(<2 x i64> %a) {
529; CHECK-LE-LABEL: bitcast_v2f64_v8i16:
530; CHECK-LE:       @ %bb.0:
531; CHECK-LE-NEXT:    vmov.u16 r0, q0[0]
532; CHECK-LE-NEXT:    vdup.16 q0, r0
533; CHECK-LE-NEXT:    bx lr
534;
535; CHECK-BE-LABEL: bitcast_v2f64_v8i16:
536; CHECK-BE:       @ %bb.0:
537; CHECK-BE-NEXT:    vrev64.16 q1, q0
538; CHECK-BE-NEXT:    vmov.u16 r0, q1[0]
539; CHECK-BE-NEXT:    vdup.16 q1, r0
540; CHECK-BE-NEXT:    vrev64.16 q0, q1
541; CHECK-BE-NEXT:    bx lr
542  %b = bitcast <2 x i64> %a to <8 x i16>
543  %r = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
544  ret <8 x i16> %r
545}
546
547define arm_aapcs_vfpcc <8 x i16> @other_max_case(i32 %blockSize) {
548; CHECK-LE-LABEL: other_max_case:
549; CHECK-LE:       @ %bb.0:
550; CHECK-LE-NEXT:    vdup.16 q0, r0
551; CHECK-LE-NEXT:    bx lr
552;
553; CHECK-BE-LABEL: other_max_case:
554; CHECK-BE:       @ %bb.0:
555; CHECK-BE-NEXT:    vmov.32 q0[0], r0
556; CHECK-BE-NEXT:    vrev32.16 q0, q0
557; CHECK-BE-NEXT:    vmov.u16 r0, q0[0]
558; CHECK-BE-NEXT:    vdup.16 q1, r0
559; CHECK-BE-NEXT:    vrev64.16 q0, q1
560; CHECK-BE-NEXT:    bx lr
561  %vec.blockSize = bitcast i32 %blockSize to <2 x i16>
562  %.splat2 = shufflevector <2 x i16> %vec.blockSize, <2 x i16> poison, <8 x i32> zeroinitializer
563  ret <8 x i16> %.splat2
564}
565