xref: /llvm-project/llvm/test/CodeGen/Thumb2/mve-shifts.ll (revision d7853bae941006cece63013f09d524e72bbbec45)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -O3 -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve %s -o - | FileCheck %s
3
4define arm_aapcs_vfpcc <16 x i8> @shl_qq_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
5; CHECK-LABEL: shl_qq_int8_t:
6; CHECK:       @ %bb.0: @ %entry
7; CHECK-NEXT:    vshl.u8 q0, q0, q1
8; CHECK-NEXT:    bx lr
9entry:
10  %0 = shl <16 x i8> %src1, %src2
11  ret <16 x i8> %0
12}
13
14define arm_aapcs_vfpcc <8 x i16> @shl_qq_int16_t(<8 x i16> %src1, <8 x i16> %src2) {
15; CHECK-LABEL: shl_qq_int16_t:
16; CHECK:       @ %bb.0: @ %entry
17; CHECK-NEXT:    vshl.u16 q0, q0, q1
18; CHECK-NEXT:    bx lr
19entry:
20  %0 = shl <8 x i16> %src1, %src2
21  ret <8 x i16> %0
22}
23
24define arm_aapcs_vfpcc <4 x i32> @shl_qq_int32_t(<4 x i32> %src1, <4 x i32> %src2) {
25; CHECK-LABEL: shl_qq_int32_t:
26; CHECK:       @ %bb.0: @ %entry
27; CHECK-NEXT:    vshl.u32 q0, q0, q1
28; CHECK-NEXT:    bx lr
29entry:
30  %0 = shl <4 x i32> %src1, %src2
31  ret <4 x i32> %0
32}
33
34define arm_aapcs_vfpcc <2 x i64> @shl_qq_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
35; CHECK-LABEL: shl_qq_int64_t:
36; CHECK:       @ %bb.0: @ %entry
37; CHECK-NEXT:    .save {r5, lr}
38; CHECK-NEXT:    push {r5, lr}
39; CHECK-NEXT:    vmov r0, s6
40; CHECK-NEXT:    vmov r2, r1, d1
41; CHECK-NEXT:    lsll r2, r1, r0
42; CHECK-NEXT:    vmov r3, s4
43; CHECK-NEXT:    vmov r0, r5, d0
44; CHECK-NEXT:    lsll r0, r5, r3
45; CHECK-NEXT:    vmov q0[2], q0[0], r0, r2
46; CHECK-NEXT:    vmov q0[3], q0[1], r5, r1
47; CHECK-NEXT:    pop {r5, pc}
48entry:
49  %0 = shl <2 x i64> %src1, %src2
50  ret <2 x i64> %0
51}
52
53
54define arm_aapcs_vfpcc <16 x i8> @shru_qq_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
55; CHECK-LABEL: shru_qq_int8_t:
56; CHECK:       @ %bb.0: @ %entry
57; CHECK-NEXT:    vneg.s8 q1, q1
58; CHECK-NEXT:    vshl.u8 q0, q0, q1
59; CHECK-NEXT:    bx lr
60entry:
61  %0 = lshr <16 x i8> %src1, %src2
62  ret <16 x i8> %0
63}
64
65define arm_aapcs_vfpcc <8 x i16> @shru_qq_int16_t(<8 x i16> %src1, <8 x i16> %src2) {
66; CHECK-LABEL: shru_qq_int16_t:
67; CHECK:       @ %bb.0: @ %entry
68; CHECK-NEXT:    vneg.s16 q1, q1
69; CHECK-NEXT:    vshl.u16 q0, q0, q1
70; CHECK-NEXT:    bx lr
71entry:
72  %0 = lshr <8 x i16> %src1, %src2
73  ret <8 x i16> %0
74}
75
76define arm_aapcs_vfpcc <4 x i32> @shru_qq_int32_t(<4 x i32> %src1, <4 x i32> %src2) {
77; CHECK-LABEL: shru_qq_int32_t:
78; CHECK:       @ %bb.0: @ %entry
79; CHECK-NEXT:    vneg.s32 q1, q1
80; CHECK-NEXT:    vshl.u32 q0, q0, q1
81; CHECK-NEXT:    bx lr
82entry:
83  %0 = lshr <4 x i32> %src1, %src2
84  ret <4 x i32> %0
85}
86
87define arm_aapcs_vfpcc <2 x i64> @shru_qq_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
88; CHECK-LABEL: shru_qq_int64_t:
89; CHECK:       @ %bb.0: @ %entry
90; CHECK-NEXT:    .save {r5, lr}
91; CHECK-NEXT:    push {r5, lr}
92; CHECK-NEXT:    vmov r2, s6
93; CHECK-NEXT:    vmov r1, s4
94; CHECK-NEXT:    vmov r0, r5, d1
95; CHECK-NEXT:    rsbs r2, r2, #0
96; CHECK-NEXT:    lsll r0, r5, r2
97; CHECK-NEXT:    vmov r2, r3, d0
98; CHECK-NEXT:    rsbs r1, r1, #0
99; CHECK-NEXT:    lsll r2, r3, r1
100; CHECK-NEXT:    vmov q0[2], q0[0], r2, r0
101; CHECK-NEXT:    vmov q0[3], q0[1], r3, r5
102; CHECK-NEXT:    pop {r5, pc}
103entry:
104  %0 = lshr <2 x i64> %src1, %src2
105  ret <2 x i64> %0
106}
107
108
109define arm_aapcs_vfpcc <16 x i8> @shrs_qq_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
110; CHECK-LABEL: shrs_qq_int8_t:
111; CHECK:       @ %bb.0: @ %entry
112; CHECK-NEXT:    vneg.s8 q1, q1
113; CHECK-NEXT:    vshl.s8 q0, q0, q1
114; CHECK-NEXT:    bx lr
115entry:
116  %0 = ashr <16 x i8> %src1, %src2
117  ret <16 x i8> %0
118}
119
120define arm_aapcs_vfpcc <8 x i16> @shrs_qq_int16_t(<8 x i16> %src1, <8 x i16> %src2) {
121; CHECK-LABEL: shrs_qq_int16_t:
122; CHECK:       @ %bb.0: @ %entry
123; CHECK-NEXT:    vneg.s16 q1, q1
124; CHECK-NEXT:    vshl.s16 q0, q0, q1
125; CHECK-NEXT:    bx lr
126entry:
127  %0 = ashr <8 x i16> %src1, %src2
128  ret <8 x i16> %0
129}
130
131define arm_aapcs_vfpcc <4 x i32> @shrs_qq_int32_t(<4 x i32> %src1, <4 x i32> %src2) {
132; CHECK-LABEL: shrs_qq_int32_t:
133; CHECK:       @ %bb.0: @ %entry
134; CHECK-NEXT:    vneg.s32 q1, q1
135; CHECK-NEXT:    vshl.s32 q0, q0, q1
136; CHECK-NEXT:    bx lr
137entry:
138  %0 = ashr <4 x i32> %src1, %src2
139  ret <4 x i32> %0
140}
141
142define arm_aapcs_vfpcc <2 x i64> @shrs_qq_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
143; CHECK-LABEL: shrs_qq_int64_t:
144; CHECK:       @ %bb.0: @ %entry
145; CHECK-NEXT:    .save {r5, lr}
146; CHECK-NEXT:    push {r5, lr}
147; CHECK-NEXT:    vmov r0, s6
148; CHECK-NEXT:    vmov r2, r1, d1
149; CHECK-NEXT:    asrl r2, r1, r0
150; CHECK-NEXT:    vmov r3, s4
151; CHECK-NEXT:    vmov r0, r5, d0
152; CHECK-NEXT:    asrl r0, r5, r3
153; CHECK-NEXT:    vmov q0[2], q0[0], r0, r2
154; CHECK-NEXT:    vmov q0[3], q0[1], r5, r1
155; CHECK-NEXT:    pop {r5, pc}
156entry:
157  %0 = ashr <2 x i64> %src1, %src2
158  ret <2 x i64> %0
159}
160
161
162define arm_aapcs_vfpcc <16 x i8> @shl_qi_int8_t(<16 x i8> %src1) {
163; CHECK-LABEL: shl_qi_int8_t:
164; CHECK:       @ %bb.0: @ %entry
165; CHECK-NEXT:    vshl.i8 q0, q0, #4
166; CHECK-NEXT:    bx lr
167entry:
168  %0 = shl <16 x i8> %src1, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
169  ret <16 x i8> %0
170}
171
172define arm_aapcs_vfpcc <8 x i16> @shl_qi_int16_t(<8 x i16> %src1) {
173; CHECK-LABEL: shl_qi_int16_t:
174; CHECK:       @ %bb.0: @ %entry
175; CHECK-NEXT:    vshl.i16 q0, q0, #4
176; CHECK-NEXT:    bx lr
177entry:
178  %0 = shl <8 x i16> %src1, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
179  ret <8 x i16> %0
180}
181
182define arm_aapcs_vfpcc <4 x i32> @shl_qi_int32_t(<4 x i32> %src1) {
183; CHECK-LABEL: shl_qi_int32_t:
184; CHECK:       @ %bb.0: @ %entry
185; CHECK-NEXT:    vshl.i32 q0, q0, #4
186; CHECK-NEXT:    bx lr
187entry:
188  %0 = shl <4 x i32> %src1, <i32 4, i32 4, i32 4, i32 4>
189  ret <4 x i32> %0
190}
191
192define arm_aapcs_vfpcc <2 x i64> @shl_qi_int64_t(<2 x i64> %src1) {
193; CHECK-LABEL: shl_qi_int64_t:
194; CHECK:       @ %bb.0: @ %entry
195; CHECK-NEXT:    vmov r0, r1, d1
196; CHECK-NEXT:    vmov r2, r3, d0
197; CHECK-NEXT:    lsll r0, r1, #4
198; CHECK-NEXT:    lsll r2, r3, #4
199; CHECK-NEXT:    vmov q0[2], q0[0], r2, r0
200; CHECK-NEXT:    vmov q0[3], q0[1], r3, r1
201; CHECK-NEXT:    bx lr
202entry:
203  %0 = shl <2 x i64> %src1, <i64 4, i64 4>
204  ret <2 x i64> %0
205}
206
207
208define arm_aapcs_vfpcc <16 x i8> @shru_qi_int8_t(<16 x i8> %src1) {
209; CHECK-LABEL: shru_qi_int8_t:
210; CHECK:       @ %bb.0: @ %entry
211; CHECK-NEXT:    vshr.u8 q0, q0, #4
212; CHECK-NEXT:    bx lr
213entry:
214  %0 = lshr <16 x i8> %src1, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
215  ret <16 x i8> %0
216}
217
218define arm_aapcs_vfpcc <8 x i16> @shru_qi_int16_t(<8 x i16> %src1) {
219; CHECK-LABEL: shru_qi_int16_t:
220; CHECK:       @ %bb.0: @ %entry
221; CHECK-NEXT:    vshr.u16 q0, q0, #4
222; CHECK-NEXT:    bx lr
223entry:
224  %0 = lshr <8 x i16> %src1, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
225  ret <8 x i16> %0
226}
227
228define arm_aapcs_vfpcc <4 x i32> @shru_qi_int32_t(<4 x i32> %src1) {
229; CHECK-LABEL: shru_qi_int32_t:
230; CHECK:       @ %bb.0: @ %entry
231; CHECK-NEXT:    vshr.u32 q0, q0, #4
232; CHECK-NEXT:    bx lr
233entry:
234  %0 = lshr <4 x i32> %src1, <i32 4, i32 4, i32 4, i32 4>
235  ret <4 x i32> %0
236}
237
238define arm_aapcs_vfpcc <2 x i64> @shru_qi_int64_t(<2 x i64> %src1) {
239; CHECK-LABEL: shru_qi_int64_t:
240; CHECK:       @ %bb.0: @ %entry
241; CHECK-NEXT:    vmov r0, r1, d1
242; CHECK-NEXT:    vmov r2, r3, d0
243; CHECK-NEXT:    lsrl r0, r1, #4
244; CHECK-NEXT:    lsrl r2, r3, #4
245; CHECK-NEXT:    vmov q0[2], q0[0], r2, r0
246; CHECK-NEXT:    vmov q0[3], q0[1], r3, r1
247; CHECK-NEXT:    bx lr
248entry:
249  %0 = lshr <2 x i64> %src1, <i64 4, i64 4>
250  ret <2 x i64> %0
251}
252
253
254define arm_aapcs_vfpcc <16 x i8> @shrs_qi_int8_t(<16 x i8> %src1) {
255; CHECK-LABEL: shrs_qi_int8_t:
256; CHECK:       @ %bb.0: @ %entry
257; CHECK-NEXT:    vshr.s8 q0, q0, #4
258; CHECK-NEXT:    bx lr
259entry:
260  %0 = ashr <16 x i8> %src1, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
261  ret <16 x i8> %0
262}
263
264define arm_aapcs_vfpcc <8 x i16> @shrs_qi_int16_t(<8 x i16> %src1) {
265; CHECK-LABEL: shrs_qi_int16_t:
266; CHECK:       @ %bb.0: @ %entry
267; CHECK-NEXT:    vshr.s16 q0, q0, #4
268; CHECK-NEXT:    bx lr
269entry:
270  %0 = ashr <8 x i16> %src1, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
271  ret <8 x i16> %0
272}
273
274define arm_aapcs_vfpcc <4 x i32> @shrs_qi_int32_t(<4 x i32> %src1) {
275; CHECK-LABEL: shrs_qi_int32_t:
276; CHECK:       @ %bb.0: @ %entry
277; CHECK-NEXT:    vshr.s32 q0, q0, #4
278; CHECK-NEXT:    bx lr
279entry:
280  %0 = ashr <4 x i32> %src1, <i32 4, i32 4, i32 4, i32 4>
281  ret <4 x i32> %0
282}
283
284define arm_aapcs_vfpcc <2 x i64> @shrs_qi_int64_t(<2 x i64> %src1) {
285; CHECK-LABEL: shrs_qi_int64_t:
286; CHECK:       @ %bb.0: @ %entry
287; CHECK-NEXT:    vmov r0, r1, d1
288; CHECK-NEXT:    vmov r2, r3, d0
289; CHECK-NEXT:    asrl r0, r1, #4
290; CHECK-NEXT:    asrl r2, r3, #4
291; CHECK-NEXT:    vmov q0[2], q0[0], r2, r0
292; CHECK-NEXT:    vmov q0[3], q0[1], r3, r1
293; CHECK-NEXT:    bx lr
294entry:
295  %0 = ashr <2 x i64> %src1, <i64 4, i64 4>
296  ret <2 x i64> %0
297}
298
299
300define arm_aapcs_vfpcc <16 x i8> @shl_qr_int8_t(<16 x i8> %src1, i8 %src2) {
301; CHECK-LABEL: shl_qr_int8_t:
302; CHECK:       @ %bb.0: @ %entry
303; CHECK-NEXT:    vshl.u8 q0, r0
304; CHECK-NEXT:    bx lr
305entry:
306  %i = insertelement <16 x i8> undef, i8 %src2, i32 0
307  %s = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
308  %0 = shl <16 x i8> %src1, %s
309  ret <16 x i8> %0
310}
311
312define arm_aapcs_vfpcc <8 x i16> @shl_qr_int16_t(<8 x i16> %src1, i16 %src2) {
313; CHECK-LABEL: shl_qr_int16_t:
314; CHECK:       @ %bb.0: @ %entry
315; CHECK-NEXT:    vshl.u16 q0, r0
316; CHECK-NEXT:    bx lr
317entry:
318  %i = insertelement <8 x i16> undef, i16 %src2, i32 0
319  %s = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
320  %0 = shl <8 x i16> %src1, %s
321  ret <8 x i16> %0
322}
323
324define arm_aapcs_vfpcc <4 x i32> @shl_qr_int32_t(<4 x i32> %src1, i32 %src2) {
325; CHECK-LABEL: shl_qr_int32_t:
326; CHECK:       @ %bb.0: @ %entry
327; CHECK-NEXT:    vshl.u32 q0, r0
328; CHECK-NEXT:    bx lr
329entry:
330  %i = insertelement <4 x i32> undef, i32 %src2, i32 0
331  %s = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
332  %0 = shl <4 x i32> %src1, %s
333  ret <4 x i32> %0
334}
335
336define arm_aapcs_vfpcc <2 x i64> @shl_qr_int64_t(<2 x i64> %src1, i64 %src2) {
337; CHECK-LABEL: shl_qr_int64_t:
338; CHECK:       @ %bb.0: @ %entry
339; CHECK-NEXT:    vmov r12, r1, d1
340; CHECK-NEXT:    vmov r2, r3, d0
341; CHECK-NEXT:    lsll r12, r1, r0
342; CHECK-NEXT:    lsll r2, r3, r0
343; CHECK-NEXT:    vmov q0[2], q0[0], r2, r12
344; CHECK-NEXT:    vmov q0[3], q0[1], r3, r1
345; CHECK-NEXT:    bx lr
346entry:
347  %i = insertelement <2 x i64> undef, i64 %src2, i32 0
348  %s = shufflevector <2 x i64> %i, <2 x i64> undef, <2 x i32> zeroinitializer
349  %0 = shl <2 x i64> %src1, %s
350  ret <2 x i64> %0
351}
352
353
354define arm_aapcs_vfpcc <16 x i8> @shru_qr_int8_t(<16 x i8> %src1, i8 %src2) {
355; CHECK-LABEL: shru_qr_int8_t:
356; CHECK:       @ %bb.0: @ %entry
357; CHECK-NEXT:    rsbs r0, r0, #0
358; CHECK-NEXT:    vshl.u8 q0, r0
359; CHECK-NEXT:    bx lr
360entry:
361  %i = insertelement <16 x i8> undef, i8 %src2, i32 0
362  %s = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
363  %0 = lshr <16 x i8> %src1, %s
364  ret <16 x i8> %0
365}
366
367define arm_aapcs_vfpcc <8 x i16> @shru_qr_int16_t(<8 x i16> %src1, i16 %src2) {
368; CHECK-LABEL: shru_qr_int16_t:
369; CHECK:       @ %bb.0: @ %entry
370; CHECK-NEXT:    rsbs r0, r0, #0
371; CHECK-NEXT:    vshl.u16 q0, r0
372; CHECK-NEXT:    bx lr
373entry:
374  %i = insertelement <8 x i16> undef, i16 %src2, i32 0
375  %s = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
376  %0 = lshr <8 x i16> %src1, %s
377  ret <8 x i16> %0
378}
379
380define arm_aapcs_vfpcc <4 x i32> @shru_qr_int32_t(<4 x i32> %src1, i32 %src2) {
381; CHECK-LABEL: shru_qr_int32_t:
382; CHECK:       @ %bb.0: @ %entry
383; CHECK-NEXT:    rsbs r0, r0, #0
384; CHECK-NEXT:    vshl.u32 q0, r0
385; CHECK-NEXT:    bx lr
386entry:
387  %i = insertelement <4 x i32> undef, i32 %src2, i32 0
388  %s = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
389  %0 = lshr <4 x i32> %src1, %s
390  ret <4 x i32> %0
391}
392
393define arm_aapcs_vfpcc <2 x i64> @shru_qr_int64_t(<2 x i64> %src1, i64 %src2) {
394; CHECK-LABEL: shru_qr_int64_t:
395; CHECK:       @ %bb.0: @ %entry
396; CHECK-NEXT:    .save {r5, lr}
397; CHECK-NEXT:    push {r5, lr}
398; CHECK-NEXT:    rsbs r3, r0, #0
399; CHECK-NEXT:    vmov r2, r1, d1
400; CHECK-NEXT:    vmov r0, r5, d0
401; CHECK-NEXT:    lsll r2, r1, r3
402; CHECK-NEXT:    lsll r0, r5, r3
403; CHECK-NEXT:    vmov q0[2], q0[0], r0, r2
404; CHECK-NEXT:    vmov q0[3], q0[1], r5, r1
405; CHECK-NEXT:    pop {r5, pc}
406entry:
407  %i = insertelement <2 x i64> undef, i64 %src2, i32 0
408  %s = shufflevector <2 x i64> %i, <2 x i64> undef, <2 x i32> zeroinitializer
409  %0 = lshr <2 x i64> %src1, %s
410  ret <2 x i64> %0
411}
412
413
414define arm_aapcs_vfpcc <16 x i8> @shrs_qr_int8_t(<16 x i8> %src1, i8 %src2) {
415; CHECK-LABEL: shrs_qr_int8_t:
416; CHECK:       @ %bb.0: @ %entry
417; CHECK-NEXT:    rsbs r0, r0, #0
418; CHECK-NEXT:    vshl.s8 q0, r0
419; CHECK-NEXT:    bx lr
420entry:
421  %i = insertelement <16 x i8> undef, i8 %src2, i32 0
422  %s = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
423  %0 = ashr <16 x i8> %src1, %s
424  ret <16 x i8> %0
425}
426
427define arm_aapcs_vfpcc <8 x i16> @shrs_qr_int16_t(<8 x i16> %src1, i16 %src2) {
428; CHECK-LABEL: shrs_qr_int16_t:
429; CHECK:       @ %bb.0: @ %entry
430; CHECK-NEXT:    rsbs r0, r0, #0
431; CHECK-NEXT:    vshl.s16 q0, r0
432; CHECK-NEXT:    bx lr
433entry:
434  %i = insertelement <8 x i16> undef, i16 %src2, i32 0
435  %s = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
436  %0 = ashr <8 x i16> %src1, %s
437  ret <8 x i16> %0
438}
439
440define arm_aapcs_vfpcc <4 x i32> @shrs_qr_int32_t(<4 x i32> %src1, i32 %src2) {
441; CHECK-LABEL: shrs_qr_int32_t:
442; CHECK:       @ %bb.0: @ %entry
443; CHECK-NEXT:    rsbs r0, r0, #0
444; CHECK-NEXT:    vshl.s32 q0, r0
445; CHECK-NEXT:    bx lr
446entry:
447  %i = insertelement <4 x i32> undef, i32 %src2, i32 0
448  %s = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
449  %0 = ashr <4 x i32> %src1, %s
450  ret <4 x i32> %0
451}
452
453define arm_aapcs_vfpcc <2 x i64> @shrs_qr_int64_t(<2 x i64> %src1, i64 %src2) {
454; CHECK-LABEL: shrs_qr_int64_t:
455; CHECK:       @ %bb.0: @ %entry
456; CHECK-NEXT:    vmov r12, r1, d1
457; CHECK-NEXT:    vmov r2, r3, d0
458; CHECK-NEXT:    asrl r12, r1, r0
459; CHECK-NEXT:    asrl r2, r3, r0
460; CHECK-NEXT:    vmov q0[2], q0[0], r2, r12
461; CHECK-NEXT:    vmov q0[3], q0[1], r3, r1
462; CHECK-NEXT:    bx lr
463entry:
464  %i = insertelement <2 x i64> undef, i64 %src2, i32 0
465  %s = shufflevector <2 x i64> %i, <2 x i64> undef, <2 x i32> zeroinitializer
466  %0 = ashr <2 x i64> %src1, %s
467  ret <2 x i64> %0
468}
469
470define arm_aapcs_vfpcc <16 x i8> @shl_qiv_int8_t(<16 x i8> %src1) {
471; CHECK-LABEL: shl_qiv_int8_t:
472; CHECK:       @ %bb.0: @ %entry
473; CHECK-NEXT:    movw r0, #513
474; CHECK-NEXT:    movt r0, #1027
475; CHECK-NEXT:    vdup.32 q1, r0
476; CHECK-NEXT:    vshl.u8 q0, q0, q1
477; CHECK-NEXT:    bx lr
478entry:
479  %0 = shl <16 x i8> %src1, <i8 1, i8 2, i8 3, i8 4, i8 1, i8 2, i8 3, i8 4, i8 1, i8 2, i8 3, i8 4, i8 1, i8 2, i8 3, i8 4>
480  ret <16 x i8> %0
481}
482
483define arm_aapcs_vfpcc <8 x i16> @shl_qiv_int16_t(<8 x i16> %src1) {
484; CHECK-LABEL: shl_qiv_int16_t:
485; CHECK:       @ %bb.0: @ %entry
486; CHECK-NEXT:    adr r0, .LCPI37_0
487; CHECK-NEXT:    vldrw.u32 q1, [r0]
488; CHECK-NEXT:    vshl.u16 q0, q0, q1
489; CHECK-NEXT:    bx lr
490; CHECK-NEXT:    .p2align 4
491; CHECK-NEXT:  @ %bb.1:
492; CHECK-NEXT:  .LCPI37_0:
493; CHECK-NEXT:    .short 1 @ 0x1
494; CHECK-NEXT:    .short 2 @ 0x2
495; CHECK-NEXT:    .short 3 @ 0x3
496; CHECK-NEXT:    .short 4 @ 0x4
497; CHECK-NEXT:    .short 1 @ 0x1
498; CHECK-NEXT:    .short 2 @ 0x2
499; CHECK-NEXT:    .short 3 @ 0x3
500; CHECK-NEXT:    .short 4 @ 0x4
501entry:
502  %0 = shl <8 x i16> %src1, <i16 1, i16 2, i16 3, i16 4, i16 1, i16 2, i16 3, i16 4>
503  ret <8 x i16> %0
504}
505
506define arm_aapcs_vfpcc <4 x i32> @shl_qiv_int32_t(<4 x i32> %src1) {
507; CHECK-LABEL: shl_qiv_int32_t:
508; CHECK:       @ %bb.0: @ %entry
509; CHECK-NEXT:    adr r0, .LCPI38_0
510; CHECK-NEXT:    vldrw.u32 q1, [r0]
511; CHECK-NEXT:    vshl.u32 q0, q0, q1
512; CHECK-NEXT:    bx lr
513; CHECK-NEXT:    .p2align 4
514; CHECK-NEXT:  @ %bb.1:
515; CHECK-NEXT:  .LCPI38_0:
516; CHECK-NEXT:    .long 1 @ 0x1
517; CHECK-NEXT:    .long 2 @ 0x2
518; CHECK-NEXT:    .long 3 @ 0x3
519; CHECK-NEXT:    .long 4 @ 0x4
520entry:
521  %0 = shl <4 x i32> %src1, <i32 1, i32 2, i32 3, i32 4>
522  ret <4 x i32> %0
523}
524