xref: /llvm-project/llvm/test/CodeGen/Thumb2/mve-vmull-splat.ll (revision 28233408a2c8670d7d94ae1bf18a2bb5f7194c32)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
3
4define arm_aapcs_vfpcc <2 x i64> @sext32_0246_0ext(<4 x i32> %src1, i32 %src2) {
5; CHECK-LABEL: sext32_0246_0ext:
6; CHECK:       @ %bb.0: @ %entry
7; CHECK-NEXT:    vmov q2[2], q2[0], r0, r0
8; CHECK-NEXT:    vmullb.s32 q1, q0, q2
9; CHECK-NEXT:    vmov q0, q1
10; CHECK-NEXT:    bx lr
11entry:
12  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
13  %out1 = sext <2 x i32> %shuf1 to <2 x i64>
14  %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
15  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
16  %out2 = sext <2 x i32> %shuf2 to <2 x i64>
17  %out = mul <2 x i64> %out1, %out2
18  ret <2 x i64> %out
19}
20
21define arm_aapcs_vfpcc <2 x i64> @sext32_0ext_0246(<4 x i32> %src1, i32 %src2) {
22; CHECK-LABEL: sext32_0ext_0246:
23; CHECK:       @ %bb.0: @ %entry
24; CHECK-NEXT:    vmov q2[2], q2[0], r0, r0
25; CHECK-NEXT:    vmullb.s32 q1, q2, q0
26; CHECK-NEXT:    vmov q0, q1
27; CHECK-NEXT:    bx lr
28entry:
29  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
30  %out1 = sext <2 x i32> %shuf1 to <2 x i64>
31  %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
32  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
33  %out2 = sext <2 x i32> %shuf2 to <2 x i64>
34  %out = mul <2 x i64> %out2, %out1
35  ret <2 x i64> %out
36}
37
38define arm_aapcs_vfpcc <2 x i64> @sext32_0246_ext0(<4 x i32> %src1, i32 %src2) {
39; CHECK-LABEL: sext32_0246_ext0:
40; CHECK:       @ %bb.0: @ %entry
41; CHECK-NEXT:    .save {r4, r5, r7, lr}
42; CHECK-NEXT:    push {r4, r5, r7, lr}
43; CHECK-NEXT:    vmov r1, s2
44; CHECK-NEXT:    vmov r3, s0
45; CHECK-NEXT:    umull lr, r12, r1, r0
46; CHECK-NEXT:    umull r2, r5, r3, r0
47; CHECK-NEXT:    vmov q0[2], q0[0], r2, lr
48; CHECK-NEXT:    asrs r2, r0, #31
49; CHECK-NEXT:    mla r4, r1, r2, r12
50; CHECK-NEXT:    asrs r1, r1, #31
51; CHECK-NEXT:    mla r2, r3, r2, r5
52; CHECK-NEXT:    asrs r3, r3, #31
53; CHECK-NEXT:    mla r1, r1, r0, r4
54; CHECK-NEXT:    mla r0, r3, r0, r2
55; CHECK-NEXT:    vmov q0[3], q0[1], r0, r1
56; CHECK-NEXT:    pop {r4, r5, r7, pc}
57entry:
58  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
59  %out1 = sext <2 x i32> %shuf1 to <2 x i64>
60  %ext = sext i32 %src2 to i64
61  %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
62  %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
63  %out = mul <2 x i64> %out1, %shuf2
64  ret <2 x i64> %out
65}
66
67define arm_aapcs_vfpcc <2 x i64> @sext32_ext0_0246(<4 x i32> %src1, i32 %src2) {
68; CHECK-LABEL: sext32_ext0_0246:
69; CHECK:       @ %bb.0: @ %entry
70; CHECK-NEXT:    .save {r4, r5, r7, lr}
71; CHECK-NEXT:    push {r4, r5, r7, lr}
72; CHECK-NEXT:    vmov r1, s2
73; CHECK-NEXT:    asrs r4, r0, #31
74; CHECK-NEXT:    vmov r3, s0
75; CHECK-NEXT:    umull lr, r12, r0, r1
76; CHECK-NEXT:    umull r2, r5, r0, r3
77; CHECK-NEXT:    vmov q0[2], q0[0], r2, lr
78; CHECK-NEXT:    asrs r2, r1, #31
79; CHECK-NEXT:    mla r2, r0, r2, r12
80; CHECK-NEXT:    mla r1, r4, r1, r2
81; CHECK-NEXT:    asrs r2, r3, #31
82; CHECK-NEXT:    mla r0, r0, r2, r5
83; CHECK-NEXT:    mla r0, r4, r3, r0
84; CHECK-NEXT:    vmov q0[3], q0[1], r0, r1
85; CHECK-NEXT:    pop {r4, r5, r7, pc}
86entry:
87  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
88  %out1 = sext <2 x i32> %shuf1 to <2 x i64>
89  %ext = sext i32 %src2 to i64
90  %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
91  %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
92  %out = mul <2 x i64> %shuf2, %out1
93  ret <2 x i64> %out
94}
95
96define arm_aapcs_vfpcc <2 x i64> @sext32_1357_0ext(<4 x i32> %src1, i32 %src2) {
97; CHECK-LABEL: sext32_1357_0ext:
98; CHECK:       @ %bb.0: @ %entry
99; CHECK-NEXT:    vmov q1[2], q1[0], r0, r0
100; CHECK-NEXT:    vrev64.32 q2, q0
101; CHECK-NEXT:    vmullb.s32 q0, q2, q1
102; CHECK-NEXT:    bx lr
103entry:
104  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
105  %out1 = sext <2 x i32> %shuf1 to <2 x i64>
106  %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
107  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
108  %out2 = sext <2 x i32> %shuf2 to <2 x i64>
109  %out = mul <2 x i64> %out1, %out2
110  ret <2 x i64> %out
111}
112
113define arm_aapcs_vfpcc <2 x i64> @sext32_0ext_1357(<4 x i32> %src1, i32 %src2) {
114; CHECK-LABEL: sext32_0ext_1357:
115; CHECK:       @ %bb.0: @ %entry
116; CHECK-NEXT:    vrev64.32 q1, q0
117; CHECK-NEXT:    vmov q2[2], q2[0], r0, r0
118; CHECK-NEXT:    vmullb.s32 q0, q2, q1
119; CHECK-NEXT:    bx lr
120entry:
121  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
122  %out1 = sext <2 x i32> %shuf1 to <2 x i64>
123  %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
124  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
125  %out2 = sext <2 x i32> %shuf2 to <2 x i64>
126  %out = mul <2 x i64> %out2, %out1
127  ret <2 x i64> %out
128}
129
130define arm_aapcs_vfpcc <2 x i64> @sext32_1357_ext0(<4 x i32> %src1, i32 %src2) {
131; CHECK-LABEL: sext32_1357_ext0:
132; CHECK:       @ %bb.0: @ %entry
133; CHECK-NEXT:    .save {r4, r5, r7, lr}
134; CHECK-NEXT:    push {r4, r5, r7, lr}
135; CHECK-NEXT:    vrev64.32 q1, q0
136; CHECK-NEXT:    vmov r1, s6
137; CHECK-NEXT:    vmov r3, s4
138; CHECK-NEXT:    umull lr, r12, r1, r0
139; CHECK-NEXT:    umull r2, r5, r3, r0
140; CHECK-NEXT:    vmov q0[2], q0[0], r2, lr
141; CHECK-NEXT:    asrs r2, r0, #31
142; CHECK-NEXT:    mla r4, r1, r2, r12
143; CHECK-NEXT:    asrs r1, r1, #31
144; CHECK-NEXT:    mla r2, r3, r2, r5
145; CHECK-NEXT:    asrs r3, r3, #31
146; CHECK-NEXT:    mla r1, r1, r0, r4
147; CHECK-NEXT:    mla r0, r3, r0, r2
148; CHECK-NEXT:    vmov q0[3], q0[1], r0, r1
149; CHECK-NEXT:    pop {r4, r5, r7, pc}
150entry:
151  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
152  %out1 = sext <2 x i32> %shuf1 to <2 x i64>
153  %ext = sext i32 %src2 to i64
154  %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
155  %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
156  %out = mul <2 x i64> %out1, %shuf2
157  ret <2 x i64> %out
158}
159
160define arm_aapcs_vfpcc <2 x i64> @sext32_ext0_1357(<4 x i32> %src1, i32 %src2) {
161; CHECK-LABEL: sext32_ext0_1357:
162; CHECK:       @ %bb.0: @ %entry
163; CHECK-NEXT:    .save {r4, r5, r7, lr}
164; CHECK-NEXT:    push {r4, r5, r7, lr}
165; CHECK-NEXT:    vrev64.32 q1, q0
166; CHECK-NEXT:    asrs r4, r0, #31
167; CHECK-NEXT:    vmov r1, s6
168; CHECK-NEXT:    vmov r3, s4
169; CHECK-NEXT:    umull lr, r12, r0, r1
170; CHECK-NEXT:    umull r2, r5, r0, r3
171; CHECK-NEXT:    vmov q0[2], q0[0], r2, lr
172; CHECK-NEXT:    asrs r2, r1, #31
173; CHECK-NEXT:    mla r2, r0, r2, r12
174; CHECK-NEXT:    mla r1, r4, r1, r2
175; CHECK-NEXT:    asrs r2, r3, #31
176; CHECK-NEXT:    mla r0, r0, r2, r5
177; CHECK-NEXT:    mla r0, r4, r3, r0
178; CHECK-NEXT:    vmov q0[3], q0[1], r0, r1
179; CHECK-NEXT:    pop {r4, r5, r7, pc}
180entry:
181  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
182  %out1 = sext <2 x i32> %shuf1 to <2 x i64>
183  %ext = sext i32 %src2 to i64
184  %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
185  %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
186  %out = mul <2 x i64> %shuf2, %out1
187  ret <2 x i64> %out
188}
189
190define arm_aapcs_vfpcc <4 x i64> @sext32_0213_0ext(<8 x i32> %src1, i32 %src2) {
191; CHECK-LABEL: sext32_0213_0ext:
192; CHECK:       @ %bb.0: @ %entry
193; CHECK-NEXT:    .vsave {d8, d9}
194; CHECK-NEXT:    vpush {d8, d9}
195; CHECK-NEXT:    vmov q4, q0
196; CHECK-NEXT:    vmov q3[2], q3[0], r0, r0
197; CHECK-NEXT:    vmov.f32 s17, s4
198; CHECK-NEXT:    vmov.f32 s0, s1
199; CHECK-NEXT:    vmullb.s32 q2, q4, q3
200; CHECK-NEXT:    vmov.f32 s2, s3
201; CHECK-NEXT:    vmullb.s32 q1, q0, q3
202; CHECK-NEXT:    vmov q0, q2
203; CHECK-NEXT:    vpop {d8, d9}
204; CHECK-NEXT:    bx lr
205entry:
206  %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
207  %out1 = sext <4 x i32> %shuf1 to <4 x i64>
208  %ins = insertelement <8 x i32> poison, i32 %src2, i32 0
209  %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <4 x i32> zeroinitializer
210  %out2 = sext <4 x i32> %shuf2 to <4 x i64>
211  %out = mul <4 x i64> %out1, %out2
212  ret <4 x i64> %out
213}
214
215define arm_aapcs_vfpcc <4 x i64> @sext32_0ext_0213(<8 x i32> %src1, i32 %src2) {
216; CHECK-LABEL: sext32_0ext_0213:
217; CHECK:       @ %bb.0: @ %entry
218; CHECK-NEXT:    .vsave {d8, d9}
219; CHECK-NEXT:    vpush {d8, d9}
220; CHECK-NEXT:    vmov q4, q0
221; CHECK-NEXT:    vmov q3[2], q3[0], r0, r0
222; CHECK-NEXT:    vmov.f32 s17, s4
223; CHECK-NEXT:    vmov.f32 s0, s1
224; CHECK-NEXT:    vmullb.s32 q2, q3, q4
225; CHECK-NEXT:    vmov.f32 s2, s3
226; CHECK-NEXT:    vmullb.s32 q1, q3, q0
227; CHECK-NEXT:    vmov q0, q2
228; CHECK-NEXT:    vpop {d8, d9}
229; CHECK-NEXT:    bx lr
230entry:
231  %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
232  %out1 = sext <4 x i32> %shuf1 to <4 x i64>
233  %ins = insertelement <8 x i32> poison, i32 %src2, i32 0
234  %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <4 x i32> zeroinitializer
235  %out2 = sext <4 x i32> %shuf2 to <4 x i64>
236  %out = mul <4 x i64> %out2, %out1
237  ret <4 x i64> %out
238}
239
240define arm_aapcs_vfpcc <4 x i64> @sext32_0213_ext0(<8 x i32> %src1, i32 %src2) {
241; CHECK-LABEL: sext32_0213_ext0:
242; CHECK:       @ %bb.0: @ %entry
243; CHECK-NEXT:    .save {r4, r5, r7, lr}
244; CHECK-NEXT:    push {r4, r5, r7, lr}
245; CHECK-NEXT:    vmov.f32 s4, s1
246; CHECK-NEXT:    vmov.f32 s6, s3
247; CHECK-NEXT:    vmov r3, s4
248; CHECK-NEXT:    vmov r1, s6
249; CHECK-NEXT:    umull r2, r5, r3, r0
250; CHECK-NEXT:    umull lr, r12, r1, r0
251; CHECK-NEXT:    vmov q1[2], q1[0], r2, lr
252; CHECK-NEXT:    asrs r2, r0, #31
253; CHECK-NEXT:    mla r4, r1, r2, r12
254; CHECK-NEXT:    asrs r1, r1, #31
255; CHECK-NEXT:    mla r5, r3, r2, r5
256; CHECK-NEXT:    asrs r3, r3, #31
257; CHECK-NEXT:    mla r1, r1, r0, r4
258; CHECK-NEXT:    mla r3, r3, r0, r5
259; CHECK-NEXT:    vmov q1[3], q1[1], r3, r1
260; CHECK-NEXT:    vmov r1, s2
261; CHECK-NEXT:    umull r3, r5, r1, r0
262; CHECK-NEXT:    mla r5, r1, r2, r5
263; CHECK-NEXT:    asrs r1, r1, #31
264; CHECK-NEXT:    mla r12, r1, r0, r5
265; CHECK-NEXT:    vmov r5, s0
266; CHECK-NEXT:    umull r4, r1, r5, r0
267; CHECK-NEXT:    mla r1, r5, r2, r1
268; CHECK-NEXT:    asrs r2, r5, #31
269; CHECK-NEXT:    vmov q0[2], q0[0], r4, r3
270; CHECK-NEXT:    mla r0, r2, r0, r1
271; CHECK-NEXT:    vmov q0[3], q0[1], r0, r12
272; CHECK-NEXT:    pop {r4, r5, r7, pc}
273entry:
274  %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
275  %out1 = sext <4 x i32> %shuf1 to <4 x i64>
276  %ext = sext i32 %src2 to i64
277  %ins = insertelement <4 x i64> poison, i64 %ext, i32 0
278  %shuf2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
279  %out = mul <4 x i64> %out1, %shuf2
280  ret <4 x i64> %out
281}
282
283define arm_aapcs_vfpcc <4 x i64> @sext32_ext0_0213(<8 x i32> %src1, i32 %src2) {
284; CHECK-LABEL: sext32_ext0_0213:
285; CHECK:       @ %bb.0: @ %entry
286; CHECK-NEXT:    .save {r4, r5, r7, lr}
287; CHECK-NEXT:    push {r4, r5, r7, lr}
288; CHECK-NEXT:    vmov.f32 s4, s1
289; CHECK-NEXT:    asrs r4, r0, #31
290; CHECK-NEXT:    vmov.f32 s6, s3
291; CHECK-NEXT:    vmov r3, s4
292; CHECK-NEXT:    vmov r1, s6
293; CHECK-NEXT:    umull r2, r5, r0, r3
294; CHECK-NEXT:    umull lr, r12, r0, r1
295; CHECK-NEXT:    vmov q1[2], q1[0], r2, lr
296; CHECK-NEXT:    asrs r2, r1, #31
297; CHECK-NEXT:    mla r2, r0, r2, r12
298; CHECK-NEXT:    mla r1, r4, r1, r2
299; CHECK-NEXT:    asrs r2, r3, #31
300; CHECK-NEXT:    mla r2, r0, r2, r5
301; CHECK-NEXT:    mla r2, r4, r3, r2
302; CHECK-NEXT:    vmov q1[3], q1[1], r2, r1
303; CHECK-NEXT:    vmov r1, s2
304; CHECK-NEXT:    umull r2, r3, r0, r1
305; CHECK-NEXT:    asrs r5, r1, #31
306; CHECK-NEXT:    mla r3, r0, r5, r3
307; CHECK-NEXT:    mla r12, r4, r1, r3
308; CHECK-NEXT:    vmov r3, s0
309; CHECK-NEXT:    umull r5, r1, r0, r3
310; CHECK-NEXT:    vmov q0[2], q0[0], r5, r2
311; CHECK-NEXT:    asrs r2, r3, #31
312; CHECK-NEXT:    mla r0, r0, r2, r1
313; CHECK-NEXT:    mla r0, r4, r3, r0
314; CHECK-NEXT:    vmov q0[3], q0[1], r0, r12
315; CHECK-NEXT:    pop {r4, r5, r7, pc}
316entry:
317  %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
318  %out1 = sext <4 x i32> %shuf1 to <4 x i64>
319  %ext = sext i32 %src2 to i64
320  %ins = insertelement <4 x i64> poison, i64 %ext, i32 0
321  %shuf2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
322  %out = mul <4 x i64> %shuf2, %out1
323  ret <4 x i64> %out
324}
325
326define arm_aapcs_vfpcc <2 x i64> @zext32_0246_0ext(<4 x i32> %src1, i32 %src2) {
327; CHECK-LABEL: zext32_0246_0ext:
328; CHECK:       @ %bb.0: @ %entry
329; CHECK-NEXT:    vmov q2[2], q2[0], r0, r0
330; CHECK-NEXT:    vmullb.u32 q1, q0, q2
331; CHECK-NEXT:    vmov q0, q1
332; CHECK-NEXT:    bx lr
333entry:
334  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
335  %out1 = zext <2 x i32> %shuf1 to <2 x i64>
336  %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
337  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
338  %out2 = zext <2 x i32> %shuf2 to <2 x i64>
339  %out = mul <2 x i64> %out1, %out2
340  ret <2 x i64> %out
341}
342
343define arm_aapcs_vfpcc <2 x i64> @zext32_0ext_0246(<4 x i32> %src1, i32 %src2) {
344; CHECK-LABEL: zext32_0ext_0246:
345; CHECK:       @ %bb.0: @ %entry
346; CHECK-NEXT:    vmov q2[2], q2[0], r0, r0
347; CHECK-NEXT:    vmullb.u32 q1, q2, q0
348; CHECK-NEXT:    vmov q0, q1
349; CHECK-NEXT:    bx lr
350entry:
351  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
352  %out1 = zext <2 x i32> %shuf1 to <2 x i64>
353  %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
354  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
355  %out2 = zext <2 x i32> %shuf2 to <2 x i64>
356  %out = mul <2 x i64> %out2, %out1
357  ret <2 x i64> %out
358}
359
360define arm_aapcs_vfpcc <2 x i64> @zext32_0246_ext0(<4 x i32> %src1, i32 %src2) {
361; CHECK-LABEL: zext32_0246_ext0:
362; CHECK:       @ %bb.0: @ %entry
363; CHECK-NEXT:    vmov r1, s2
364; CHECK-NEXT:    vmov r3, s0
365; CHECK-NEXT:    umull r1, r2, r1, r0
366; CHECK-NEXT:    umull r0, r3, r3, r0
367; CHECK-NEXT:    vmov q0[2], q0[0], r0, r1
368; CHECK-NEXT:    vmov q0[3], q0[1], r3, r2
369; CHECK-NEXT:    bx lr
370entry:
371  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
372  %out1 = zext <2 x i32> %shuf1 to <2 x i64>
373  %ext = zext i32 %src2 to i64
374  %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
375  %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
376  %out = mul <2 x i64> %out1, %shuf2
377  ret <2 x i64> %out
378}
379
380define arm_aapcs_vfpcc <2 x i64> @zext32_ext0_0246(<4 x i32> %src1, i32 %src2) {
381; CHECK-LABEL: zext32_ext0_0246:
382; CHECK:       @ %bb.0: @ %entry
383; CHECK-NEXT:    vmov r1, s2
384; CHECK-NEXT:    vmov r3, s0
385; CHECK-NEXT:    umull r1, r2, r0, r1
386; CHECK-NEXT:    umull r0, r3, r0, r3
387; CHECK-NEXT:    vmov q0[2], q0[0], r0, r1
388; CHECK-NEXT:    vmov q0[3], q0[1], r3, r2
389; CHECK-NEXT:    bx lr
390entry:
391  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
392  %out1 = zext <2 x i32> %shuf1 to <2 x i64>
393  %ext = zext i32 %src2 to i64
394  %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
395  %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
396  %out = mul <2 x i64> %shuf2, %out1
397  ret <2 x i64> %out
398}
399
400define arm_aapcs_vfpcc <2 x i64> @zext32_1357_0ext(<4 x i32> %src1, i32 %src2) {
401; CHECK-LABEL: zext32_1357_0ext:
402; CHECK:       @ %bb.0: @ %entry
403; CHECK-NEXT:    vmov q1[2], q1[0], r0, r0
404; CHECK-NEXT:    vrev64.32 q2, q0
405; CHECK-NEXT:    vmullb.u32 q0, q2, q1
406; CHECK-NEXT:    bx lr
407entry:
408  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
409  %out1 = zext <2 x i32> %shuf1 to <2 x i64>
410  %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
411  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
412  %out2 = zext <2 x i32> %shuf2 to <2 x i64>
413  %out = mul <2 x i64> %out1, %out2
414  ret <2 x i64> %out
415}
416
417define arm_aapcs_vfpcc <2 x i64> @zext32_0ext_1357(<4 x i32> %src1, i32 %src2) {
418; CHECK-LABEL: zext32_0ext_1357:
419; CHECK:       @ %bb.0: @ %entry
420; CHECK-NEXT:    vrev64.32 q1, q0
421; CHECK-NEXT:    vmov q2[2], q2[0], r0, r0
422; CHECK-NEXT:    vmullb.u32 q0, q2, q1
423; CHECK-NEXT:    bx lr
424entry:
425  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
426  %out1 = zext <2 x i32> %shuf1 to <2 x i64>
427  %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
428  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
429  %out2 = zext <2 x i32> %shuf2 to <2 x i64>
430  %out = mul <2 x i64> %out2, %out1
431  ret <2 x i64> %out
432}
433
434define arm_aapcs_vfpcc <2 x i64> @zext32_1357_ext0(<4 x i32> %src1, i32 %src2) {
435; CHECK-LABEL: zext32_1357_ext0:
436; CHECK:       @ %bb.0: @ %entry
437; CHECK-NEXT:    vrev64.32 q1, q0
438; CHECK-NEXT:    vmov r1, s6
439; CHECK-NEXT:    vmov r3, s4
440; CHECK-NEXT:    umull r1, r2, r1, r0
441; CHECK-NEXT:    umull r0, r3, r3, r0
442; CHECK-NEXT:    vmov q0[2], q0[0], r0, r1
443; CHECK-NEXT:    vmov q0[3], q0[1], r3, r2
444; CHECK-NEXT:    bx lr
445entry:
446  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
447  %out1 = zext <2 x i32> %shuf1 to <2 x i64>
448  %ext = zext i32 %src2 to i64
449  %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
450  %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
451  %out = mul <2 x i64> %out1, %shuf2
452  ret <2 x i64> %out
453}
454
455define arm_aapcs_vfpcc <2 x i64> @zext32_ext0_1357(<4 x i32> %src1, i32 %src2) {
456; CHECK-LABEL: zext32_ext0_1357:
457; CHECK:       @ %bb.0: @ %entry
458; CHECK-NEXT:    vrev64.32 q1, q0
459; CHECK-NEXT:    vmov r1, s6
460; CHECK-NEXT:    vmov r3, s4
461; CHECK-NEXT:    umull r1, r2, r0, r1
462; CHECK-NEXT:    umull r0, r3, r0, r3
463; CHECK-NEXT:    vmov q0[2], q0[0], r0, r1
464; CHECK-NEXT:    vmov q0[3], q0[1], r3, r2
465; CHECK-NEXT:    bx lr
466entry:
467  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
468  %out1 = zext <2 x i32> %shuf1 to <2 x i64>
469  %ext = zext i32 %src2 to i64
470  %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
471  %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
472  %out = mul <2 x i64> %shuf2, %out1
473  ret <2 x i64> %out
474}
475
476define arm_aapcs_vfpcc <4 x i64> @zext32_0213_0ext(<8 x i32> %src1, i32 %src2) {
477; CHECK-LABEL: zext32_0213_0ext:
478; CHECK:       @ %bb.0: @ %entry
479; CHECK-NEXT:    .vsave {d8, d9}
480; CHECK-NEXT:    vpush {d8, d9}
481; CHECK-NEXT:    vmov q4, q0
482; CHECK-NEXT:    vmov q3[2], q3[0], r0, r0
483; CHECK-NEXT:    vmov.f32 s17, s4
484; CHECK-NEXT:    vmov.f32 s0, s1
485; CHECK-NEXT:    vmullb.u32 q2, q4, q3
486; CHECK-NEXT:    vmov.f32 s2, s3
487; CHECK-NEXT:    vmullb.u32 q1, q0, q3
488; CHECK-NEXT:    vmov q0, q2
489; CHECK-NEXT:    vpop {d8, d9}
490; CHECK-NEXT:    bx lr
491entry:
492  %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
493  %out1 = zext <4 x i32> %shuf1 to <4 x i64>
494  %ins = insertelement <8 x i32> poison, i32 %src2, i32 0
495  %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <4 x i32> zeroinitializer
496  %out2 = zext <4 x i32> %shuf2 to <4 x i64>
497  %out = mul <4 x i64> %out1, %out2
498  ret <4 x i64> %out
499}
500
501define arm_aapcs_vfpcc <4 x i64> @zext32_0ext_0213(<8 x i32> %src1, i32 %src2) {
502; CHECK-LABEL: zext32_0ext_0213:
503; CHECK:       @ %bb.0: @ %entry
504; CHECK-NEXT:    .vsave {d8, d9}
505; CHECK-NEXT:    vpush {d8, d9}
506; CHECK-NEXT:    vmov q4, q0
507; CHECK-NEXT:    vmov q3[2], q3[0], r0, r0
508; CHECK-NEXT:    vmov.f32 s17, s4
509; CHECK-NEXT:    vmov.f32 s0, s1
510; CHECK-NEXT:    vmullb.u32 q2, q3, q4
511; CHECK-NEXT:    vmov.f32 s2, s3
512; CHECK-NEXT:    vmullb.u32 q1, q3, q0
513; CHECK-NEXT:    vmov q0, q2
514; CHECK-NEXT:    vpop {d8, d9}
515; CHECK-NEXT:    bx lr
516entry:
517  %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
518  %out1 = zext <4 x i32> %shuf1 to <4 x i64>
519  %ins = insertelement <8 x i32> poison, i32 %src2, i32 0
520  %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <4 x i32> zeroinitializer
521  %out2 = zext <4 x i32> %shuf2 to <4 x i64>
522  %out = mul <4 x i64> %out2, %out1
523  ret <4 x i64> %out
524}
525
526define arm_aapcs_vfpcc <4 x i64> @zext32_0213_ext0(<8 x i32> %src1, i32 %src2) {
527; CHECK-LABEL: zext32_0213_ext0:
528; CHECK:       @ %bb.0: @ %entry
529; CHECK-NEXT:    vmov r1, s2
530; CHECK-NEXT:    vmov r3, s0
531; CHECK-NEXT:    vmov.f32 s0, s1
532; CHECK-NEXT:    vmov.f32 s2, s3
533; CHECK-NEXT:    umull r1, r12, r1, r0
534; CHECK-NEXT:    umull r3, r2, r3, r0
535; CHECK-NEXT:    vmov q2[2], q2[0], r3, r1
536; CHECK-NEXT:    vmov r1, s2
537; CHECK-NEXT:    vmov r3, s0
538; CHECK-NEXT:    vmov q2[3], q2[1], r2, r12
539; CHECK-NEXT:    vmov q0, q2
540; CHECK-NEXT:    umull r1, r2, r1, r0
541; CHECK-NEXT:    umull r0, r3, r3, r0
542; CHECK-NEXT:    vmov q1[2], q1[0], r0, r1
543; CHECK-NEXT:    vmov q1[3], q1[1], r3, r2
544; CHECK-NEXT:    bx lr
545entry:
546  %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
547  %out1 = zext <4 x i32> %shuf1 to <4 x i64>
548  %ext = zext i32 %src2 to i64
549  %ins = insertelement <4 x i64> poison, i64 %ext, i32 0
550  %shuf2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
551  %out = mul <4 x i64> %out1, %shuf2
552  ret <4 x i64> %out
553}
554
555define arm_aapcs_vfpcc <4 x i64> @zext32_ext0_0213(<8 x i32> %src1, i32 %src2) {
556; CHECK-LABEL: zext32_ext0_0213:
557; CHECK:       @ %bb.0: @ %entry
558; CHECK-NEXT:    vmov r1, s2
559; CHECK-NEXT:    vmov r3, s0
560; CHECK-NEXT:    vmov.f32 s0, s1
561; CHECK-NEXT:    vmov.f32 s2, s3
562; CHECK-NEXT:    umull r1, r12, r0, r1
563; CHECK-NEXT:    umull r3, r2, r0, r3
564; CHECK-NEXT:    vmov q2[2], q2[0], r3, r1
565; CHECK-NEXT:    vmov r1, s2
566; CHECK-NEXT:    vmov r3, s0
567; CHECK-NEXT:    vmov q2[3], q2[1], r2, r12
568; CHECK-NEXT:    vmov q0, q2
569; CHECK-NEXT:    umull r1, r2, r0, r1
570; CHECK-NEXT:    umull r0, r3, r0, r3
571; CHECK-NEXT:    vmov q1[2], q1[0], r0, r1
572; CHECK-NEXT:    vmov q1[3], q1[1], r3, r2
573; CHECK-NEXT:    bx lr
574entry:
575  %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
576  %out1 = zext <4 x i32> %shuf1 to <4 x i64>
577  %ext = zext i32 %src2 to i64
578  %ins = insertelement <4 x i64> poison, i64 %ext, i32 0
579  %shuf2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
580  %out = mul <4 x i64> %shuf2, %out1
581  ret <4 x i64> %out
582}
583
584define arm_aapcs_vfpcc <4 x i32> @sext16_02468101214_0ext(<8 x i16> %src1, i16 %src2) {
585; CHECK-LABEL: sext16_02468101214_0ext:
586; CHECK:       @ %bb.0: @ %entry
587; CHECK-NEXT:    vdup.32 q1, r0
588; CHECK-NEXT:    vmullb.s16 q0, q0, q1
589; CHECK-NEXT:    bx lr
590entry:
591  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
592  %out1 = sext <4 x i16> %shuf1 to <4 x i32>
593  %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
594  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
595  %out2 = sext <4 x i16> %shuf2 to <4 x i32>
596  %out = mul <4 x i32> %out1, %out2
597  ret <4 x i32> %out
598}
599
600define arm_aapcs_vfpcc <4 x i32> @sext16_0ext_02468101214(<8 x i16> %src1, i16 %src2) {
601; CHECK-LABEL: sext16_0ext_02468101214:
602; CHECK:       @ %bb.0: @ %entry
603; CHECK-NEXT:    vdup.32 q1, r0
604; CHECK-NEXT:    vmullb.s16 q0, q1, q0
605; CHECK-NEXT:    bx lr
606entry:
607  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
608  %out1 = sext <4 x i16> %shuf1 to <4 x i32>
609  %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
610  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
611  %out2 = sext <4 x i16> %shuf2 to <4 x i32>
612  %out = mul <4 x i32> %out2, %out1
613  ret <4 x i32> %out
614}
615
616define arm_aapcs_vfpcc <4 x i32> @sext16_02468101214_ext0(<8 x i16> %src1, i16 %src2) {
617; CHECK-LABEL: sext16_02468101214_ext0:
618; CHECK:       @ %bb.0: @ %entry
619; CHECK-NEXT:    vmovlb.s16 q0, q0
620; CHECK-NEXT:    sxth r0, r0
621; CHECK-NEXT:    vmul.i32 q0, q0, r0
622; CHECK-NEXT:    bx lr
623entry:
624  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
625  %out1 = sext <4 x i16> %shuf1 to <4 x i32>
626  %ext = sext i16 %src2 to i32
627  %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
628  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
629  %out = mul <4 x i32> %out1, %shuf2
630  ret <4 x i32> %out
631}
632
633define arm_aapcs_vfpcc <4 x i32> @sext16_ext0_02468101214(<8 x i16> %src1, i16 %src2) {
634; CHECK-LABEL: sext16_ext0_02468101214:
635; CHECK:       @ %bb.0: @ %entry
636; CHECK-NEXT:    vmovlb.s16 q0, q0
637; CHECK-NEXT:    sxth r0, r0
638; CHECK-NEXT:    vmul.i32 q0, q0, r0
639; CHECK-NEXT:    bx lr
640entry:
641  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
642  %out1 = sext <4 x i16> %shuf1 to <4 x i32>
643  %ext = sext i16 %src2 to i32
644  %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
645  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
646  %out = mul <4 x i32> %shuf2, %out1
647  ret <4 x i32> %out
648}
649
650define arm_aapcs_vfpcc <4 x i32> @sext16_13579111315_0ext(<8 x i16> %src1, i16 %src2) {
651; CHECK-LABEL: sext16_13579111315_0ext:
652; CHECK:       @ %bb.0: @ %entry
653; CHECK-NEXT:    vdup.32 q1, r0
654; CHECK-NEXT:    vrev32.16 q0, q0
655; CHECK-NEXT:    vmullb.s16 q0, q0, q1
656; CHECK-NEXT:    bx lr
657entry:
658  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
659  %out1 = sext <4 x i16> %shuf1 to <4 x i32>
660  %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
661  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
662  %out2 = sext <4 x i16> %shuf2 to <4 x i32>
663  %out = mul <4 x i32> %out1, %out2
664  ret <4 x i32> %out
665}
666
667define arm_aapcs_vfpcc <4 x i32> @sext16_0ext_13579111315(<8 x i16> %src1, i16 %src2) {
668; CHECK-LABEL: sext16_0ext_13579111315:
669; CHECK:       @ %bb.0: @ %entry
670; CHECK-NEXT:    vrev32.16 q0, q0
671; CHECK-NEXT:    vdup.32 q1, r0
672; CHECK-NEXT:    vmullb.s16 q0, q1, q0
673; CHECK-NEXT:    bx lr
674entry:
675  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
676  %out1 = sext <4 x i16> %shuf1 to <4 x i32>
677  %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
678  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
679  %out2 = sext <4 x i16> %shuf2 to <4 x i32>
680  %out = mul <4 x i32> %out2, %out1
681  ret <4 x i32> %out
682}
683
684define arm_aapcs_vfpcc <4 x i32> @sext16_13579111315_ext0(<8 x i16> %src1, i16 %src2) {
685; CHECK-LABEL: sext16_13579111315_ext0:
686; CHECK:       @ %bb.0: @ %entry
687; CHECK-NEXT:    vmovlt.s16 q0, q0
688; CHECK-NEXT:    sxth r0, r0
689; CHECK-NEXT:    vmul.i32 q0, q0, r0
690; CHECK-NEXT:    bx lr
691entry:
692  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
693  %out1 = sext <4 x i16> %shuf1 to <4 x i32>
694  %ext = sext i16 %src2 to i32
695  %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
696  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
697  %out = mul <4 x i32> %out1, %shuf2
698  ret <4 x i32> %out
699}
700
701define arm_aapcs_vfpcc <4 x i32> @sext16_ext0_13579111315(<8 x i16> %src1, i16 %src2) {
702; CHECK-LABEL: sext16_ext0_13579111315:
703; CHECK:       @ %bb.0: @ %entry
704; CHECK-NEXT:    vmovlt.s16 q0, q0
705; CHECK-NEXT:    sxth r0, r0
706; CHECK-NEXT:    vmul.i32 q0, q0, r0
707; CHECK-NEXT:    bx lr
708entry:
709  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
710  %out1 = sext <4 x i16> %shuf1 to <4 x i32>
711  %ext = sext i16 %src2 to i32
712  %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
713  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
714  %out = mul <4 x i32> %shuf2, %out1
715  ret <4 x i32> %out
716}
717
718define arm_aapcs_vfpcc <8 x i32> @sext16_02461357_0ext(<16 x i16> %src1, i16 %src2) {
719; CHECK-LABEL: sext16_02461357_0ext:
720; CHECK:       @ %bb.0: @ %entry
721; CHECK-NEXT:    vdup.16 q2, r0
722; CHECK-NEXT:    vrev32.16 q1, q0
723; CHECK-NEXT:    vmullb.s16 q1, q1, q2
724; CHECK-NEXT:    vmullb.s16 q0, q0, q2
725; CHECK-NEXT:    bx lr
726entry:
727  %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
728  %out1 = sext <8 x i16> %shuf1 to <8 x i32>
729  %ins = insertelement <16 x i16> poison, i16 %src2, i32 0
730  %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <8 x i32> zeroinitializer
731  %out2 = sext <8 x i16> %shuf2 to <8 x i32>
732  %out = mul <8 x i32> %out1, %out2
733  ret <8 x i32> %out
734}
735
736define arm_aapcs_vfpcc <8 x i32> @sext16_0ext_02461357(<16 x i16> %src1, i16 %src2) {
737; CHECK-LABEL: sext16_0ext_02461357:
738; CHECK:       @ %bb.0: @ %entry
739; CHECK-NEXT:    vrev32.16 q1, q0
740; CHECK-NEXT:    vdup.16 q2, r0
741; CHECK-NEXT:    vmullb.s16 q1, q2, q1
742; CHECK-NEXT:    vmullb.s16 q0, q2, q0
743; CHECK-NEXT:    bx lr
744entry:
745  %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
746  %out1 = sext <8 x i16> %shuf1 to <8 x i32>
747  %ins = insertelement <16 x i16> poison, i16 %src2, i32 0
748  %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <8 x i32> zeroinitializer
749  %out2 = sext <8 x i16> %shuf2 to <8 x i32>
750  %out = mul <8 x i32> %out2, %out1
751  ret <8 x i32> %out
752}
753
754define arm_aapcs_vfpcc <8 x i32> @sext16_02461357_ext0(<16 x i16> %src1, i16 %src2) {
755; CHECK-LABEL: sext16_02461357_ext0:
756; CHECK:       @ %bb.0: @ %entry
757; CHECK-NEXT:    vmovlb.s16 q1, q0
758; CHECK-NEXT:    sxth r0, r0
759; CHECK-NEXT:    vmul.i32 q2, q1, r0
760; CHECK-NEXT:    vmovlt.s16 q0, q0
761; CHECK-NEXT:    vmul.i32 q1, q0, r0
762; CHECK-NEXT:    vmov q0, q2
763; CHECK-NEXT:    bx lr
764entry:
765  %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
766  %out1 = sext <8 x i16> %shuf1 to <8 x i32>
767  %ext = sext i16 %src2 to i32
768  %ins = insertelement <8 x i32> poison, i32 %ext, i32 0
769  %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
770  %out = mul <8 x i32> %out1, %shuf2
771  ret <8 x i32> %out
772}
773
774define arm_aapcs_vfpcc <8 x i32> @sext16_ext0_02461357(<16 x i16> %src1, i16 %src2) {
775; CHECK-LABEL: sext16_ext0_02461357:
776; CHECK:       @ %bb.0: @ %entry
777; CHECK-NEXT:    vmovlb.s16 q1, q0
778; CHECK-NEXT:    sxth r0, r0
779; CHECK-NEXT:    vmul.i32 q2, q1, r0
780; CHECK-NEXT:    vmovlt.s16 q0, q0
781; CHECK-NEXT:    vmul.i32 q1, q0, r0
782; CHECK-NEXT:    vmov q0, q2
783; CHECK-NEXT:    bx lr
784entry:
785  %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
786  %out1 = sext <8 x i16> %shuf1 to <8 x i32>
787  %ext = sext i16 %src2 to i32
788  %ins = insertelement <8 x i32> poison, i32 %ext, i32 0
789  %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
790  %out = mul <8 x i32> %shuf2, %out1
791  ret <8 x i32> %out
792}
793
794define arm_aapcs_vfpcc <4 x i32> @zext16_02468101214_0ext(<8 x i16> %src1, i16 %src2) {
795; CHECK-LABEL: zext16_02468101214_0ext:
796; CHECK:       @ %bb.0: @ %entry
797; CHECK-NEXT:    vdup.32 q1, r0
798; CHECK-NEXT:    vmullb.u16 q0, q0, q1
799; CHECK-NEXT:    bx lr
800entry:
801  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
802  %out1 = zext <4 x i16> %shuf1 to <4 x i32>
803  %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
804  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
805  %out2 = zext <4 x i16> %shuf2 to <4 x i32>
806  %out = mul <4 x i32> %out1, %out2
807  ret <4 x i32> %out
808}
809
810define arm_aapcs_vfpcc <4 x i32> @zext16_0ext_02468101214(<8 x i16> %src1, i16 %src2) {
811; CHECK-LABEL: zext16_0ext_02468101214:
812; CHECK:       @ %bb.0: @ %entry
813; CHECK-NEXT:    vdup.32 q1, r0
814; CHECK-NEXT:    vmullb.u16 q0, q1, q0
815; CHECK-NEXT:    bx lr
816entry:
817  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
818  %out1 = zext <4 x i16> %shuf1 to <4 x i32>
819  %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
820  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
821  %out2 = zext <4 x i16> %shuf2 to <4 x i32>
822  %out = mul <4 x i32> %out2, %out1
823  ret <4 x i32> %out
824}
825
826define arm_aapcs_vfpcc <4 x i32> @zext16_02468101214_ext0(<8 x i16> %src1, i16 %src2) {
827; CHECK-LABEL: zext16_02468101214_ext0:
828; CHECK:       @ %bb.0: @ %entry
829; CHECK-NEXT:    vmovlb.u16 q0, q0
830; CHECK-NEXT:    uxth r0, r0
831; CHECK-NEXT:    vmul.i32 q0, q0, r0
832; CHECK-NEXT:    bx lr
833entry:
834  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
835  %out1 = zext <4 x i16> %shuf1 to <4 x i32>
836  %ext = zext i16 %src2 to i32
837  %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
838  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
839  %out = mul <4 x i32> %out1, %shuf2
840  ret <4 x i32> %out
841}
842
843define arm_aapcs_vfpcc <4 x i32> @zext16_ext0_02468101214(<8 x i16> %src1, i16 %src2) {
844; CHECK-LABEL: zext16_ext0_02468101214:
845; CHECK:       @ %bb.0: @ %entry
846; CHECK-NEXT:    vmovlb.u16 q0, q0
847; CHECK-NEXT:    uxth r0, r0
848; CHECK-NEXT:    vmul.i32 q0, q0, r0
849; CHECK-NEXT:    bx lr
850entry:
851  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
852  %out1 = zext <4 x i16> %shuf1 to <4 x i32>
853  %ext = zext i16 %src2 to i32
854  %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
855  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
856  %out = mul <4 x i32> %shuf2, %out1
857  ret <4 x i32> %out
858}
859
860define arm_aapcs_vfpcc <4 x i32> @zext16_13579111315_0ext(<8 x i16> %src1, i16 %src2) {
861; CHECK-LABEL: zext16_13579111315_0ext:
862; CHECK:       @ %bb.0: @ %entry
863; CHECK-NEXT:    vdup.32 q1, r0
864; CHECK-NEXT:    vrev32.16 q0, q0
865; CHECK-NEXT:    vmullb.u16 q0, q0, q1
866; CHECK-NEXT:    bx lr
867entry:
868  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
869  %out1 = zext <4 x i16> %shuf1 to <4 x i32>
870  %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
871  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
872  %out2 = zext <4 x i16> %shuf2 to <4 x i32>
873  %out = mul <4 x i32> %out1, %out2
874  ret <4 x i32> %out
875}
876
877define arm_aapcs_vfpcc <4 x i32> @zext16_0ext_13579111315(<8 x i16> %src1, i16 %src2) {
878; CHECK-LABEL: zext16_0ext_13579111315:
879; CHECK:       @ %bb.0: @ %entry
880; CHECK-NEXT:    vrev32.16 q0, q0
881; CHECK-NEXT:    vdup.32 q1, r0
882; CHECK-NEXT:    vmullb.u16 q0, q1, q0
883; CHECK-NEXT:    bx lr
884entry:
885  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
886  %out1 = zext <4 x i16> %shuf1 to <4 x i32>
887  %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
888  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
889  %out2 = zext <4 x i16> %shuf2 to <4 x i32>
890  %out = mul <4 x i32> %out2, %out1
891  ret <4 x i32> %out
892}
893
894define arm_aapcs_vfpcc <4 x i32> @zext16_13579111315_ext0(<8 x i16> %src1, i16 %src2) {
895; CHECK-LABEL: zext16_13579111315_ext0:
896; CHECK:       @ %bb.0: @ %entry
897; CHECK-NEXT:    vmovlt.u16 q0, q0
898; CHECK-NEXT:    uxth r0, r0
899; CHECK-NEXT:    vmul.i32 q0, q0, r0
900; CHECK-NEXT:    bx lr
901entry:
902  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
903  %out1 = zext <4 x i16> %shuf1 to <4 x i32>
904  %ext = zext i16 %src2 to i32
905  %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
906  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
907  %out = mul <4 x i32> %out1, %shuf2
908  ret <4 x i32> %out
909}
910
911define arm_aapcs_vfpcc <4 x i32> @zext16_ext0_13579111315(<8 x i16> %src1, i16 %src2) {
912; CHECK-LABEL: zext16_ext0_13579111315:
913; CHECK:       @ %bb.0: @ %entry
914; CHECK-NEXT:    vmovlt.u16 q0, q0
915; CHECK-NEXT:    uxth r0, r0
916; CHECK-NEXT:    vmul.i32 q0, q0, r0
917; CHECK-NEXT:    bx lr
918entry:
919  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
920  %out1 = zext <4 x i16> %shuf1 to <4 x i32>
921  %ext = zext i16 %src2 to i32
922  %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
923  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
924  %out = mul <4 x i32> %shuf2, %out1
925  ret <4 x i32> %out
926}
927
928define arm_aapcs_vfpcc <8 x i32> @zext16_02461357_0ext(<16 x i16> %src1, i16 %src2) {
929; CHECK-LABEL: zext16_02461357_0ext:
930; CHECK:       @ %bb.0: @ %entry
931; CHECK-NEXT:    vdup.16 q2, r0
932; CHECK-NEXT:    vrev32.16 q1, q0
933; CHECK-NEXT:    vmullb.u16 q1, q1, q2
934; CHECK-NEXT:    vmullb.u16 q0, q0, q2
935; CHECK-NEXT:    bx lr
936entry:
937  %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
938  %out1 = zext <8 x i16> %shuf1 to <8 x i32>
939  %ins = insertelement <16 x i16> poison, i16 %src2, i32 0
940  %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <8 x i32> zeroinitializer
941  %out2 = zext <8 x i16> %shuf2 to <8 x i32>
942  %out = mul <8 x i32> %out1, %out2
943  ret <8 x i32> %out
944}
945
946define arm_aapcs_vfpcc <8 x i32> @zext16_0ext_02461357(<16 x i16> %src1, i16 %src2) {
947; CHECK-LABEL: zext16_0ext_02461357:
948; CHECK:       @ %bb.0: @ %entry
949; CHECK-NEXT:    vrev32.16 q1, q0
950; CHECK-NEXT:    vdup.16 q2, r0
951; CHECK-NEXT:    vmullb.u16 q1, q2, q1
952; CHECK-NEXT:    vmullb.u16 q0, q2, q0
953; CHECK-NEXT:    bx lr
954entry:
955  %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
956  %out1 = zext <8 x i16> %shuf1 to <8 x i32>
957  %ins = insertelement <16 x i16> poison, i16 %src2, i32 0
958  %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <8 x i32> zeroinitializer
959  %out2 = zext <8 x i16> %shuf2 to <8 x i32>
960  %out = mul <8 x i32> %out2, %out1
961  ret <8 x i32> %out
962}
963
964define arm_aapcs_vfpcc <8 x i32> @zext16_02461357_ext0(<16 x i16> %src1, i16 %src2) {
965; CHECK-LABEL: zext16_02461357_ext0:
966; CHECK:       @ %bb.0: @ %entry
967; CHECK-NEXT:    vmovlb.u16 q1, q0
968; CHECK-NEXT:    uxth r0, r0
969; CHECK-NEXT:    vmul.i32 q2, q1, r0
970; CHECK-NEXT:    vmovlt.u16 q0, q0
971; CHECK-NEXT:    vmul.i32 q1, q0, r0
972; CHECK-NEXT:    vmov q0, q2
973; CHECK-NEXT:    bx lr
974entry:
975  %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
976  %out1 = zext <8 x i16> %shuf1 to <8 x i32>
977  %ext = zext i16 %src2 to i32
978  %ins = insertelement <8 x i32> poison, i32 %ext, i32 0
979  %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
980  %out = mul <8 x i32> %out1, %shuf2
981  ret <8 x i32> %out
982}
983
984define arm_aapcs_vfpcc <8 x i32> @zext16_ext0_02461357(<16 x i16> %src1, i16 %src2) {
985; CHECK-LABEL: zext16_ext0_02461357:
986; CHECK:       @ %bb.0: @ %entry
987; CHECK-NEXT:    vmovlb.u16 q1, q0
988; CHECK-NEXT:    uxth r0, r0
989; CHECK-NEXT:    vmul.i32 q2, q1, r0
990; CHECK-NEXT:    vmovlt.u16 q0, q0
991; CHECK-NEXT:    vmul.i32 q1, q0, r0
992; CHECK-NEXT:    vmov q0, q2
993; CHECK-NEXT:    bx lr
994entry:
995  %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
996  %out1 = zext <8 x i16> %shuf1 to <8 x i32>
997  %ext = zext i16 %src2 to i32
998  %ins = insertelement <8 x i32> poison, i32 %ext, i32 0
999  %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
1000  %out = mul <8 x i32> %shuf2, %out1
1001  ret <8 x i32> %out
1002}
1003
1004define arm_aapcs_vfpcc <8 x i16> @sext8_024681012141618202224262830_0ext(<16 x i8> %src1, i8 %src2) {
1005; CHECK-LABEL: sext8_024681012141618202224262830_0ext:
1006; CHECK:       @ %bb.0: @ %entry
1007; CHECK-NEXT:    vdup.16 q1, r0
1008; CHECK-NEXT:    vmullb.s8 q0, q0, q1
1009; CHECK-NEXT:    bx lr
1010entry:
1011  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1012  %out1 = sext <8 x i8> %shuf1 to <8 x i16>
1013  %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
1014  %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
1015  %out2 = sext <8 x i8> %shuf2 to <8 x i16>
1016  %out = mul <8 x i16> %out1, %out2
1017  ret <8 x i16> %out
1018}
1019
1020define arm_aapcs_vfpcc <8 x i16> @sext8_0ext_024681012141618202224262830(<16 x i8> %src1, i8 %src2) {
1021; CHECK-LABEL: sext8_0ext_024681012141618202224262830:
1022; CHECK:       @ %bb.0: @ %entry
1023; CHECK-NEXT:    vdup.16 q1, r0
1024; CHECK-NEXT:    vmullb.s8 q0, q1, q0
1025; CHECK-NEXT:    bx lr
1026entry:
1027  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1028  %out1 = sext <8 x i8> %shuf1 to <8 x i16>
1029  %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
1030  %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
1031  %out2 = sext <8 x i8> %shuf2 to <8 x i16>
1032  %out = mul <8 x i16> %out2, %out1
1033  ret <8 x i16> %out
1034}
1035
1036define arm_aapcs_vfpcc <8 x i16> @sext8_024681012141618202224262830_ext0(<16 x i8> %src1, i8 %src2) {
1037; CHECK-LABEL: sext8_024681012141618202224262830_ext0:
1038; CHECK:       @ %bb.0: @ %entry
1039; CHECK-NEXT:    vmovlb.s8 q0, q0
1040; CHECK-NEXT:    sxtb r0, r0
1041; CHECK-NEXT:    vmul.i16 q0, q0, r0
1042; CHECK-NEXT:    bx lr
1043entry:
1044  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1045  %out1 = sext <8 x i8> %shuf1 to <8 x i16>
1046  %ext = sext i8 %src2 to i16
1047  %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
1048  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
1049  %out = mul <8 x i16> %out1, %shuf2
1050  ret <8 x i16> %out
1051}
1052
1053define arm_aapcs_vfpcc <8 x i16> @sext8_ext0_024681012141618202224262830(<16 x i8> %src1, i8 %src2) {
1054; CHECK-LABEL: sext8_ext0_024681012141618202224262830:
1055; CHECK:       @ %bb.0: @ %entry
1056; CHECK-NEXT:    vmovlb.s8 q0, q0
1057; CHECK-NEXT:    sxtb r0, r0
1058; CHECK-NEXT:    vmul.i16 q0, q0, r0
1059; CHECK-NEXT:    bx lr
1060entry:
1061  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1062  %out1 = sext <8 x i8> %shuf1 to <8 x i16>
1063  %ext = sext i8 %src2 to i16
1064  %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
1065  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
1066  %out = mul <8 x i16> %shuf2, %out1
1067  ret <8 x i16> %out
1068}
1069
1070define arm_aapcs_vfpcc <8 x i16> @sext8_135791113151719212325272931_0ext(<16 x i8> %src1, i8 %src2) {
1071; CHECK-LABEL: sext8_135791113151719212325272931_0ext:
1072; CHECK:       @ %bb.0: @ %entry
1073; CHECK-NEXT:    vdup.16 q1, r0
1074; CHECK-NEXT:    vrev16.8 q0, q0
1075; CHECK-NEXT:    vmullb.s8 q0, q0, q1
1076; CHECK-NEXT:    bx lr
1077entry:
1078  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1079  %out1 = sext <8 x i8> %shuf1 to <8 x i16>
1080  %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
1081  %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
1082  %out2 = sext <8 x i8> %shuf2 to <8 x i16>
1083  %out = mul <8 x i16> %out1, %out2
1084  ret <8 x i16> %out
1085}
1086
1087define arm_aapcs_vfpcc <8 x i16> @sext8_0ext_135791113151719212325272931(<16 x i8> %src1, i8 %src2) {
1088; CHECK-LABEL: sext8_0ext_135791113151719212325272931:
1089; CHECK:       @ %bb.0: @ %entry
1090; CHECK-NEXT:    vrev16.8 q0, q0
1091; CHECK-NEXT:    vdup.16 q1, r0
1092; CHECK-NEXT:    vmullb.s8 q0, q1, q0
1093; CHECK-NEXT:    bx lr
1094entry:
1095  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1096  %out1 = sext <8 x i8> %shuf1 to <8 x i16>
1097  %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
1098  %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
1099  %out2 = sext <8 x i8> %shuf2 to <8 x i16>
1100  %out = mul <8 x i16> %out2, %out1
1101  ret <8 x i16> %out
1102}
1103
1104define arm_aapcs_vfpcc <8 x i16> @sext8_135791113151719212325272931_ext0(<16 x i8> %src1, i8 %src2) {
1105; CHECK-LABEL: sext8_135791113151719212325272931_ext0:
1106; CHECK:       @ %bb.0: @ %entry
1107; CHECK-NEXT:    vmovlt.s8 q0, q0
1108; CHECK-NEXT:    sxtb r0, r0
1109; CHECK-NEXT:    vmul.i16 q0, q0, r0
1110; CHECK-NEXT:    bx lr
1111entry:
1112  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1113  %out1 = sext <8 x i8> %shuf1 to <8 x i16>
1114  %ext = sext i8 %src2 to i16
1115  %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
1116  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
1117  %out = mul <8 x i16> %out1, %shuf2
1118  ret <8 x i16> %out
1119}
1120
1121define arm_aapcs_vfpcc <8 x i16> @sext8_ext0_135791113151719212325272931(<16 x i8> %src1, i8 %src2) {
1122; CHECK-LABEL: sext8_ext0_135791113151719212325272931:
1123; CHECK:       @ %bb.0: @ %entry
1124; CHECK-NEXT:    vmovlt.s8 q0, q0
1125; CHECK-NEXT:    sxtb r0, r0
1126; CHECK-NEXT:    vmul.i16 q0, q0, r0
1127; CHECK-NEXT:    bx lr
1128entry:
1129  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1130  %out1 = sext <8 x i8> %shuf1 to <8 x i16>
1131  %ext = sext i8 %src2 to i16
1132  %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
1133  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
1134  %out = mul <8 x i16> %shuf2, %out1
1135  ret <8 x i16> %out
1136}
1137
1138define arm_aapcs_vfpcc <16 x i16> @sext8_0246810121413579111315_0ext(<32 x i8> %src1, i8 %src2) {
1139; CHECK-LABEL: sext8_0246810121413579111315_0ext:
1140; CHECK:       @ %bb.0: @ %entry
1141; CHECK-NEXT:    vdup.8 q2, r0
1142; CHECK-NEXT:    vrev16.8 q1, q0
1143; CHECK-NEXT:    vmullb.s8 q1, q1, q2
1144; CHECK-NEXT:    vmullb.s8 q0, q0, q2
1145; CHECK-NEXT:    bx lr
1146entry:
1147  %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1148  %out1 = sext <16 x i8> %shuf1 to <16 x i16>
1149  %ins = insertelement <32 x i8> poison, i8 %src2, i32 0
1150  %shuf2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <16 x i32> zeroinitializer
1151  %out2 = sext <16 x i8> %shuf2 to <16 x i16>
1152  %out = mul <16 x i16> %out1, %out2
1153  ret <16 x i16> %out
1154}
1155
1156define arm_aapcs_vfpcc <16 x i16> @sext8_0ext_0246810121413579111315(<32 x i8> %src1, i8 %src2) {
1157; CHECK-LABEL: sext8_0ext_0246810121413579111315:
1158; CHECK:       @ %bb.0: @ %entry
1159; CHECK-NEXT:    vrev16.8 q1, q0
1160; CHECK-NEXT:    vdup.8 q2, r0
1161; CHECK-NEXT:    vmullb.s8 q1, q2, q1
1162; CHECK-NEXT:    vmullb.s8 q0, q2, q0
1163; CHECK-NEXT:    bx lr
1164entry:
1165  %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1166  %out1 = sext <16 x i8> %shuf1 to <16 x i16>
1167  %ins = insertelement <32 x i8> poison, i8 %src2, i32 0
1168  %shuf2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <16 x i32> zeroinitializer
1169  %out2 = sext <16 x i8> %shuf2 to <16 x i16>
1170  %out = mul <16 x i16> %out2, %out1
1171  ret <16 x i16> %out
1172}
1173
1174define arm_aapcs_vfpcc <16 x i16> @sext8_0246810121413579111315_ext0(<32 x i8> %src1, i8 %src2) {
1175; CHECK-LABEL: sext8_0246810121413579111315_ext0:
1176; CHECK:       @ %bb.0: @ %entry
1177; CHECK-NEXT:    vmovlb.s8 q1, q0
1178; CHECK-NEXT:    sxtb r0, r0
1179; CHECK-NEXT:    vmul.i16 q2, q1, r0
1180; CHECK-NEXT:    vmovlt.s8 q0, q0
1181; CHECK-NEXT:    vmul.i16 q1, q0, r0
1182; CHECK-NEXT:    vmov q0, q2
1183; CHECK-NEXT:    bx lr
1184entry:
1185  %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1186  %out1 = sext <16 x i8> %shuf1 to <16 x i16>
1187  %ext = sext i8 %src2 to i16
1188  %ins = insertelement <16 x i16> poison, i16 %ext, i32 0
1189  %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
1190  %out = mul <16 x i16> %out1, %shuf2
1191  ret <16 x i16> %out
1192}
1193
1194define arm_aapcs_vfpcc <16 x i16> @sext8_ext0_0246810121413579111315(<32 x i8> %src1, i8 %src2) {
1195; CHECK-LABEL: sext8_ext0_0246810121413579111315:
1196; CHECK:       @ %bb.0: @ %entry
1197; CHECK-NEXT:    vmovlb.s8 q1, q0
1198; CHECK-NEXT:    sxtb r0, r0
1199; CHECK-NEXT:    vmul.i16 q2, q1, r0
1200; CHECK-NEXT:    vmovlt.s8 q0, q0
1201; CHECK-NEXT:    vmul.i16 q1, q0, r0
1202; CHECK-NEXT:    vmov q0, q2
1203; CHECK-NEXT:    bx lr
1204entry:
1205  %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1206  %out1 = sext <16 x i8> %shuf1 to <16 x i16>
1207  %ext = sext i8 %src2 to i16
1208  %ins = insertelement <16 x i16> poison, i16 %ext, i32 0
1209  %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
1210  %out = mul <16 x i16> %shuf2, %out1
1211  ret <16 x i16> %out
1212}
1213
1214define arm_aapcs_vfpcc <8 x i16> @zext8_024681012141618202224262830_0ext(<16 x i8> %src1, i8 %src2) {
1215; CHECK-LABEL: zext8_024681012141618202224262830_0ext:
1216; CHECK:       @ %bb.0: @ %entry
1217; CHECK-NEXT:    vdup.16 q1, r0
1218; CHECK-NEXT:    vmullb.u8 q0, q0, q1
1219; CHECK-NEXT:    bx lr
1220entry:
1221  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1222  %out1 = zext <8 x i8> %shuf1 to <8 x i16>
1223  %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
1224  %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
1225  %out2 = zext <8 x i8> %shuf2 to <8 x i16>
1226  %out = mul <8 x i16> %out1, %out2
1227  ret <8 x i16> %out
1228}
1229
1230define arm_aapcs_vfpcc <8 x i16> @zext8_0ext_024681012141618202224262830(<16 x i8> %src1, i8 %src2) {
1231; CHECK-LABEL: zext8_0ext_024681012141618202224262830:
1232; CHECK:       @ %bb.0: @ %entry
1233; CHECK-NEXT:    vdup.16 q1, r0
1234; CHECK-NEXT:    vmullb.u8 q0, q1, q0
1235; CHECK-NEXT:    bx lr
1236entry:
1237  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1238  %out1 = zext <8 x i8> %shuf1 to <8 x i16>
1239  %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
1240  %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
1241  %out2 = zext <8 x i8> %shuf2 to <8 x i16>
1242  %out = mul <8 x i16> %out2, %out1
1243  ret <8 x i16> %out
1244}
1245
1246define arm_aapcs_vfpcc <8 x i16> @zext8_024681012141618202224262830_ext0(<16 x i8> %src1, i8 %src2) {
1247; CHECK-LABEL: zext8_024681012141618202224262830_ext0:
1248; CHECK:       @ %bb.0: @ %entry
1249; CHECK-NEXT:    vmovlb.u8 q0, q0
1250; CHECK-NEXT:    uxtb r0, r0
1251; CHECK-NEXT:    vmul.i16 q0, q0, r0
1252; CHECK-NEXT:    bx lr
1253entry:
1254  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1255  %out1 = zext <8 x i8> %shuf1 to <8 x i16>
1256  %ext = zext i8 %src2 to i16
1257  %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
1258  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
1259  %out = mul <8 x i16> %out1, %shuf2
1260  ret <8 x i16> %out
1261}
1262
1263define arm_aapcs_vfpcc <8 x i16> @zext8_ext0_024681012141618202224262830(<16 x i8> %src1, i8 %src2) {
1264; CHECK-LABEL: zext8_ext0_024681012141618202224262830:
1265; CHECK:       @ %bb.0: @ %entry
1266; CHECK-NEXT:    vmovlb.u8 q0, q0
1267; CHECK-NEXT:    uxtb r0, r0
1268; CHECK-NEXT:    vmul.i16 q0, q0, r0
1269; CHECK-NEXT:    bx lr
1270entry:
1271  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1272  %out1 = zext <8 x i8> %shuf1 to <8 x i16>
1273  %ext = zext i8 %src2 to i16
1274  %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
1275  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
1276  %out = mul <8 x i16> %shuf2, %out1
1277  ret <8 x i16> %out
1278}
1279
1280define arm_aapcs_vfpcc <8 x i16> @zext8_135791113151719212325272931_0ext(<16 x i8> %src1, i8 %src2) {
1281; CHECK-LABEL: zext8_135791113151719212325272931_0ext:
1282; CHECK:       @ %bb.0: @ %entry
1283; CHECK-NEXT:    vdup.16 q1, r0
1284; CHECK-NEXT:    vrev16.8 q0, q0
1285; CHECK-NEXT:    vmullb.u8 q0, q0, q1
1286; CHECK-NEXT:    bx lr
1287entry:
1288  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1289  %out1 = zext <8 x i8> %shuf1 to <8 x i16>
1290  %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
1291  %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
1292  %out2 = zext <8 x i8> %shuf2 to <8 x i16>
1293  %out = mul <8 x i16> %out1, %out2
1294  ret <8 x i16> %out
1295}
1296
1297define arm_aapcs_vfpcc <8 x i16> @zext8_0ext_135791113151719212325272931(<16 x i8> %src1, i8 %src2) {
1298; CHECK-LABEL: zext8_0ext_135791113151719212325272931:
1299; CHECK:       @ %bb.0: @ %entry
1300; CHECK-NEXT:    vrev16.8 q0, q0
1301; CHECK-NEXT:    vdup.16 q1, r0
1302; CHECK-NEXT:    vmullb.u8 q0, q1, q0
1303; CHECK-NEXT:    bx lr
1304entry:
1305  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1306  %out1 = zext <8 x i8> %shuf1 to <8 x i16>
1307  %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
1308  %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
1309  %out2 = zext <8 x i8> %shuf2 to <8 x i16>
1310  %out = mul <8 x i16> %out2, %out1
1311  ret <8 x i16> %out
1312}
1313
1314define arm_aapcs_vfpcc <8 x i16> @zext8_135791113151719212325272931_ext0(<16 x i8> %src1, i8 %src2) {
1315; CHECK-LABEL: zext8_135791113151719212325272931_ext0:
1316; CHECK:       @ %bb.0: @ %entry
1317; CHECK-NEXT:    vmovlt.u8 q0, q0
1318; CHECK-NEXT:    uxtb r0, r0
1319; CHECK-NEXT:    vmul.i16 q0, q0, r0
1320; CHECK-NEXT:    bx lr
1321entry:
1322  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1323  %out1 = zext <8 x i8> %shuf1 to <8 x i16>
1324  %ext = zext i8 %src2 to i16
1325  %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
1326  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
1327  %out = mul <8 x i16> %out1, %shuf2
1328  ret <8 x i16> %out
1329}
1330
1331define arm_aapcs_vfpcc <8 x i16> @zext8_ext0_135791113151719212325272931(<16 x i8> %src1, i8 %src2) {
1332; CHECK-LABEL: zext8_ext0_135791113151719212325272931:
1333; CHECK:       @ %bb.0: @ %entry
1334; CHECK-NEXT:    vmovlt.u8 q0, q0
1335; CHECK-NEXT:    uxtb r0, r0
1336; CHECK-NEXT:    vmul.i16 q0, q0, r0
1337; CHECK-NEXT:    bx lr
1338entry:
1339  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1340  %out1 = zext <8 x i8> %shuf1 to <8 x i16>
1341  %ext = zext i8 %src2 to i16
1342  %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
1343  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
1344  %out = mul <8 x i16> %shuf2, %out1
1345  ret <8 x i16> %out
1346}
1347
1348define arm_aapcs_vfpcc <16 x i16> @zext8_0246810121413579111315_0ext(<32 x i8> %src1, i8 %src2) {
1349; CHECK-LABEL: zext8_0246810121413579111315_0ext:
1350; CHECK:       @ %bb.0: @ %entry
1351; CHECK-NEXT:    vdup.8 q2, r0
1352; CHECK-NEXT:    vrev16.8 q1, q0
1353; CHECK-NEXT:    vmullb.u8 q1, q1, q2
1354; CHECK-NEXT:    vmullb.u8 q0, q0, q2
1355; CHECK-NEXT:    bx lr
1356entry:
1357  %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1358  %out1 = zext <16 x i8> %shuf1 to <16 x i16>
1359  %ins = insertelement <32 x i8> poison, i8 %src2, i32 0
1360  %shuf2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <16 x i32> zeroinitializer
1361  %out2 = zext <16 x i8> %shuf2 to <16 x i16>
1362  %out = mul <16 x i16> %out1, %out2
1363  ret <16 x i16> %out
1364}
1365
1366define arm_aapcs_vfpcc <16 x i16> @zext8_0ext_0246810121413579111315(<32 x i8> %src1, i8 %src2) {
1367; CHECK-LABEL: zext8_0ext_0246810121413579111315:
1368; CHECK:       @ %bb.0: @ %entry
1369; CHECK-NEXT:    vrev16.8 q1, q0
1370; CHECK-NEXT:    vdup.8 q2, r0
1371; CHECK-NEXT:    vmullb.u8 q1, q2, q1
1372; CHECK-NEXT:    vmullb.u8 q0, q2, q0
1373; CHECK-NEXT:    bx lr
1374entry:
1375  %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1376  %out1 = zext <16 x i8> %shuf1 to <16 x i16>
1377  %ins = insertelement <32 x i8> poison, i8 %src2, i32 0
1378  %shuf2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <16 x i32> zeroinitializer
1379  %out2 = zext <16 x i8> %shuf2 to <16 x i16>
1380  %out = mul <16 x i16> %out2, %out1
1381  ret <16 x i16> %out
1382}
1383
1384define arm_aapcs_vfpcc <16 x i16> @zext8_0246810121413579111315_ext0(<32 x i8> %src1, i8 %src2) {
1385; CHECK-LABEL: zext8_0246810121413579111315_ext0:
1386; CHECK:       @ %bb.0: @ %entry
1387; CHECK-NEXT:    vmovlb.u8 q1, q0
1388; CHECK-NEXT:    uxtb r0, r0
1389; CHECK-NEXT:    vmul.i16 q2, q1, r0
1390; CHECK-NEXT:    vmovlt.u8 q0, q0
1391; CHECK-NEXT:    vmul.i16 q1, q0, r0
1392; CHECK-NEXT:    vmov q0, q2
1393; CHECK-NEXT:    bx lr
1394entry:
1395  %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1396  %out1 = zext <16 x i8> %shuf1 to <16 x i16>
1397  %ext = zext i8 %src2 to i16
1398  %ins = insertelement <16 x i16> poison, i16 %ext, i32 0
1399  %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
1400  %out = mul <16 x i16> %out1, %shuf2
1401  ret <16 x i16> %out
1402}
1403
1404define arm_aapcs_vfpcc <16 x i16> @zext8_ext0_0246810121413579111315(<32 x i8> %src1, i8 %src2) {
1405; CHECK-LABEL: zext8_ext0_0246810121413579111315:
1406; CHECK:       @ %bb.0: @ %entry
1407; CHECK-NEXT:    vmovlb.u8 q1, q0
1408; CHECK-NEXT:    uxtb r0, r0
1409; CHECK-NEXT:    vmul.i16 q2, q1, r0
1410; CHECK-NEXT:    vmovlt.u8 q0, q0
1411; CHECK-NEXT:    vmul.i16 q1, q0, r0
1412; CHECK-NEXT:    vmov q0, q2
1413; CHECK-NEXT:    bx lr
1414entry:
1415  %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1416  %out1 = zext <16 x i8> %shuf1 to <16 x i16>
1417  %ext = zext i8 %src2 to i16
1418  %ins = insertelement <16 x i16> poison, i16 %ext, i32 0
1419  %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
1420  %out = mul <16 x i16> %shuf2, %out1
1421  ret <16 x i16> %out
1422}
1423