xref: /llvm-project/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll (revision 01c8cd664a9bea23a49c863a39351949ac11a4fd)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3; RUN: llc < %s -mtriple aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4
5; Supported combines
6
7define <8 x i16> @dupsext_v8i8_v8i16(i8 %src, <8 x i8> %b) {
8; CHECK-SD-LABEL: dupsext_v8i8_v8i16:
9; CHECK-SD:       // %bb.0: // %entry
10; CHECK-SD-NEXT:    dup v1.8b, w0
11; CHECK-SD-NEXT:    smull v0.8h, v1.8b, v0.8b
12; CHECK-SD-NEXT:    ret
13;
14; CHECK-GI-LABEL: dupsext_v8i8_v8i16:
15; CHECK-GI:       // %bb.0: // %entry
16; CHECK-GI-NEXT:    lsl w8, w0, #8
17; CHECK-GI-NEXT:    sshll v0.8h, v0.8b, #0
18; CHECK-GI-NEXT:    sbfx w8, w8, #8, #8
19; CHECK-GI-NEXT:    dup v1.8h, w8
20; CHECK-GI-NEXT:    mul v0.8h, v1.8h, v0.8h
21; CHECK-GI-NEXT:    ret
22entry:
23  %in = sext i8 %src to i16
24  %ext.b = sext <8 x i8> %b to <8 x i16>
25  %broadcast.splatinsert = insertelement <8 x i16> undef, i16 %in, i16 0
26  %broadcast.splat = shufflevector <8 x i16> %broadcast.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
27  %out = mul nsw <8 x i16> %broadcast.splat, %ext.b
28  ret <8 x i16> %out
29}
30
31define <8 x i16> @dupzext_v8i8_v8i16(i8 %src, <8 x i8> %b) {
32; CHECK-SD-LABEL: dupzext_v8i8_v8i16:
33; CHECK-SD:       // %bb.0: // %entry
34; CHECK-SD-NEXT:    dup v1.8b, w0
35; CHECK-SD-NEXT:    umull v0.8h, v1.8b, v0.8b
36; CHECK-SD-NEXT:    ret
37;
38; CHECK-GI-LABEL: dupzext_v8i8_v8i16:
39; CHECK-GI:       // %bb.0: // %entry
40; CHECK-GI-NEXT:    and w8, w0, #0xff
41; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
42; CHECK-GI-NEXT:    dup v1.8h, w8
43; CHECK-GI-NEXT:    mul v0.8h, v1.8h, v0.8h
44; CHECK-GI-NEXT:    ret
45entry:
46  %in = zext i8 %src to i16
47  %ext.b = zext <8 x i8> %b to <8 x i16>
48  %broadcast.splatinsert = insertelement <8 x i16> undef, i16 %in, i16 0
49  %broadcast.splat = shufflevector <8 x i16> %broadcast.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
50  %out = mul nuw <8 x i16> %broadcast.splat, %ext.b
51  ret <8 x i16> %out
52}
53
54define <4 x i32> @dupsext_v4i16_v4i32(i16 %src, <4 x i16> %b) {
55; CHECK-SD-LABEL: dupsext_v4i16_v4i32:
56; CHECK-SD:       // %bb.0: // %entry
57; CHECK-SD-NEXT:    dup v1.4h, w0
58; CHECK-SD-NEXT:    smull v0.4s, v1.4h, v0.4h
59; CHECK-SD-NEXT:    ret
60;
61; CHECK-GI-LABEL: dupsext_v4i16_v4i32:
62; CHECK-GI:       // %bb.0: // %entry
63; CHECK-GI-NEXT:    sxth w8, w0
64; CHECK-GI-NEXT:    sshll v0.4s, v0.4h, #0
65; CHECK-GI-NEXT:    dup v1.4s, w8
66; CHECK-GI-NEXT:    mul v0.4s, v1.4s, v0.4s
67; CHECK-GI-NEXT:    ret
68entry:
69  %in = sext i16 %src to i32
70  %ext.b = sext <4 x i16> %b to <4 x i32>
71  %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %in, i32 0
72  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
73  %out = mul nsw <4 x i32> %broadcast.splat, %ext.b
74  ret <4 x i32> %out
75}
76
77define <4 x i32> @dupzext_v4i16_v4i32(i16 %src, <4 x i16> %b) {
78; CHECK-SD-LABEL: dupzext_v4i16_v4i32:
79; CHECK-SD:       // %bb.0: // %entry
80; CHECK-SD-NEXT:    dup v1.4h, w0
81; CHECK-SD-NEXT:    umull v0.4s, v1.4h, v0.4h
82; CHECK-SD-NEXT:    ret
83;
84; CHECK-GI-LABEL: dupzext_v4i16_v4i32:
85; CHECK-GI:       // %bb.0: // %entry
86; CHECK-GI-NEXT:    and w8, w0, #0xffff
87; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
88; CHECK-GI-NEXT:    dup v1.4s, w8
89; CHECK-GI-NEXT:    mul v0.4s, v1.4s, v0.4s
90; CHECK-GI-NEXT:    ret
91entry:
92  %in = zext i16 %src to i32
93  %ext.b = zext <4 x i16> %b to <4 x i32>
94  %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %in, i32 0
95  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
96  %out = mul nuw <4 x i32> %broadcast.splat, %ext.b
97  ret <4 x i32> %out
98}
99
100define <2 x i64> @dupsext_v2i32_v2i64(i32 %src, <2 x i32> %b) {
101; CHECK-SD-LABEL: dupsext_v2i32_v2i64:
102; CHECK-SD:       // %bb.0: // %entry
103; CHECK-SD-NEXT:    dup v1.2s, w0
104; CHECK-SD-NEXT:    smull v0.2d, v1.2s, v0.2s
105; CHECK-SD-NEXT:    ret
106;
107; CHECK-GI-LABEL: dupsext_v2i32_v2i64:
108; CHECK-GI:       // %bb.0: // %entry
109; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 def $x0
110; CHECK-GI-NEXT:    sxtw x8, w0
111; CHECK-GI-NEXT:    sshll v0.2d, v0.2s, #0
112; CHECK-GI-NEXT:    dup v1.2d, x8
113; CHECK-GI-NEXT:    fmov x9, d0
114; CHECK-GI-NEXT:    mov x11, v0.d[1]
115; CHECK-GI-NEXT:    fmov x8, d1
116; CHECK-GI-NEXT:    mov x10, v1.d[1]
117; CHECK-GI-NEXT:    mul x8, x8, x9
118; CHECK-GI-NEXT:    mul x9, x10, x11
119; CHECK-GI-NEXT:    mov v0.d[0], x8
120; CHECK-GI-NEXT:    mov v0.d[1], x9
121; CHECK-GI-NEXT:    ret
122entry:
123  %in = sext i32 %src to i64
124  %ext.b = sext <2 x i32> %b to <2 x i64>
125  %broadcast.splatinsert = insertelement <2 x i64> undef, i64 %in, i64 0
126  %broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
127  %out = mul nsw <2 x i64> %broadcast.splat, %ext.b
128  ret <2 x i64> %out
129}
130
131define <2 x i64> @dupzext_v2i32_v2i64(i32 %src, <2 x i32> %b) {
132; CHECK-SD-LABEL: dupzext_v2i32_v2i64:
133; CHECK-SD:       // %bb.0: // %entry
134; CHECK-SD-NEXT:    dup v1.2s, w0
135; CHECK-SD-NEXT:    umull v0.2d, v1.2s, v0.2s
136; CHECK-SD-NEXT:    ret
137;
138; CHECK-GI-LABEL: dupzext_v2i32_v2i64:
139; CHECK-GI:       // %bb.0: // %entry
140; CHECK-GI-NEXT:    mov w8, w0
141; CHECK-GI-NEXT:    ushll v0.2d, v0.2s, #0
142; CHECK-GI-NEXT:    dup v1.2d, x8
143; CHECK-GI-NEXT:    fmov x9, d0
144; CHECK-GI-NEXT:    mov x11, v0.d[1]
145; CHECK-GI-NEXT:    fmov x8, d1
146; CHECK-GI-NEXT:    mov x10, v1.d[1]
147; CHECK-GI-NEXT:    mul x8, x8, x9
148; CHECK-GI-NEXT:    mul x9, x10, x11
149; CHECK-GI-NEXT:    mov v0.d[0], x8
150; CHECK-GI-NEXT:    mov v0.d[1], x9
151; CHECK-GI-NEXT:    ret
152entry:
153  %in = zext i32 %src to i64
154  %ext.b = zext <2 x i32> %b to <2 x i64>
155  %broadcast.splatinsert = insertelement <2 x i64> undef, i64 %in, i64 0
156  %broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
157  %out = mul nuw <2 x i64> %broadcast.splat, %ext.b
158  ret <2 x i64> %out
159}
160
161define <2 x i32> @dupzext_v2i32_v2i64_trunc(i32 %src, <2 x i32> %b) {
162; CHECK-SD-LABEL: dupzext_v2i32_v2i64_trunc:
163; CHECK-SD:       // %bb.0: // %entry
164; CHECK-SD-NEXT:    dup v1.2s, w0
165; CHECK-SD-NEXT:    smull v0.2d, v1.2s, v0.2s
166; CHECK-SD-NEXT:    xtn v0.2s, v0.2d
167; CHECK-SD-NEXT:    ret
168;
169; CHECK-GI-LABEL: dupzext_v2i32_v2i64_trunc:
170; CHECK-GI:       // %bb.0: // %entry
171; CHECK-GI-NEXT:    mov w8, w0
172; CHECK-GI-NEXT:    ushll v0.2d, v0.2s, #0
173; CHECK-GI-NEXT:    dup v1.2d, x8
174; CHECK-GI-NEXT:    fmov x9, d0
175; CHECK-GI-NEXT:    mov x11, v0.d[1]
176; CHECK-GI-NEXT:    fmov x8, d1
177; CHECK-GI-NEXT:    mov x10, v1.d[1]
178; CHECK-GI-NEXT:    mul x8, x8, x9
179; CHECK-GI-NEXT:    mul x9, x10, x11
180; CHECK-GI-NEXT:    mov v0.d[0], x8
181; CHECK-GI-NEXT:    mov v0.d[1], x9
182; CHECK-GI-NEXT:    xtn v0.2s, v0.2d
183; CHECK-GI-NEXT:    ret
184entry:
185  %in = zext i32 %src to i64
186  %ext.b = zext <2 x i32> %b to <2 x i64>
187  %broadcast.splatinsert = insertelement <2 x i64> poison, i64 %in, i64 0
188  %broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> poison, <2 x i32> zeroinitializer
189  %prod = mul nuw <2 x i64> %broadcast.splat, %ext.b
190  %out = trunc <2 x i64> %prod to <2 x i32>
191  ret <2 x i32> %out
192}
193
194; Unsupported combines
195
196define <2 x i16> @dupsext_v2i8_v2i16(i8 %src, <2 x i8> %b) {
197; CHECK-SD-LABEL: dupsext_v2i8_v2i16:
198; CHECK-SD:       // %bb.0: // %entry
199; CHECK-SD-NEXT:    sxtb w8, w0
200; CHECK-SD-NEXT:    shl v0.2s, v0.2s, #24
201; CHECK-SD-NEXT:    dup v1.2s, w8
202; CHECK-SD-NEXT:    sshr v0.2s, v0.2s, #24
203; CHECK-SD-NEXT:    mul v0.2s, v1.2s, v0.2s
204; CHECK-SD-NEXT:    ret
205;
206; CHECK-GI-LABEL: dupsext_v2i8_v2i16:
207; CHECK-GI:       // %bb.0: // %entry
208; CHECK-GI-NEXT:    lsl w8, w0, #8
209; CHECK-GI-NEXT:    shl v0.2s, v0.2s, #24
210; CHECK-GI-NEXT:    sbfx w8, w8, #8, #8
211; CHECK-GI-NEXT:    sshr v0.2s, v0.2s, #24
212; CHECK-GI-NEXT:    dup v1.4h, w8
213; CHECK-GI-NEXT:    ushll v1.4s, v1.4h, #0
214; CHECK-GI-NEXT:    mul v0.2s, v1.2s, v0.2s
215; CHECK-GI-NEXT:    ret
216entry:
217  %in = sext i8 %src to i16
218  %ext.b = sext <2 x i8> %b to <2 x i16>
219  %broadcast.splatinsert = insertelement <2 x i16> undef, i16 %in, i16 0
220  %broadcast.splat = shufflevector <2 x i16> %broadcast.splatinsert, <2 x i16> undef, <2 x i32> zeroinitializer
221  %out = mul nsw <2 x i16> %broadcast.splat, %ext.b
222  ret <2 x i16> %out
223}
224
225define <2 x i64> @dupzext_v2i16_v2i64(i16 %src, <2 x i16> %b) {
226; CHECK-SD-LABEL: dupzext_v2i16_v2i64:
227; CHECK-SD:       // %bb.0: // %entry
228; CHECK-SD-NEXT:    movi d1, #0x00ffff0000ffff
229; CHECK-SD-NEXT:    and w8, w0, #0xffff
230; CHECK-SD-NEXT:    dup v2.2s, w8
231; CHECK-SD-NEXT:    and v0.8b, v0.8b, v1.8b
232; CHECK-SD-NEXT:    umull v0.2d, v2.2s, v0.2s
233; CHECK-SD-NEXT:    ret
234;
235; CHECK-GI-LABEL: dupzext_v2i16_v2i64:
236; CHECK-GI:       // %bb.0: // %entry
237; CHECK-GI-NEXT:    movi v1.2d, #0x0000000000ffff
238; CHECK-GI-NEXT:    ushll v0.2d, v0.2s, #0
239; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 def $x0
240; CHECK-GI-NEXT:    and x8, x0, #0xffff
241; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
242; CHECK-GI-NEXT:    dup v1.2d, x8
243; CHECK-GI-NEXT:    fmov x8, d1
244; CHECK-GI-NEXT:    fmov x9, d0
245; CHECK-GI-NEXT:    mov x10, v1.d[1]
246; CHECK-GI-NEXT:    mov x11, v0.d[1]
247; CHECK-GI-NEXT:    mul x8, x8, x9
248; CHECK-GI-NEXT:    mul x9, x10, x11
249; CHECK-GI-NEXT:    mov v0.d[0], x8
250; CHECK-GI-NEXT:    mov v0.d[1], x9
251; CHECK-GI-NEXT:    ret
252entry:
253  %in = zext i16 %src to i64
254  %ext.b = zext <2 x i16> %b to <2 x i64>
255  %broadcast.splatinsert = insertelement <2 x i64> undef, i64 %in, i64 0
256  %broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
257  %out = mul nuw <2 x i64> %broadcast.splat, %ext.b
258  ret <2 x i64> %out
259}
260
261; dupsext_v4i8_v4i16
262; dupsext_v2i8_v2i32
263; dupsext_v4i8_v4i32
264; dupsext_v2i8_v2i64
265; dupsext_v2i16_v2i32
266; dupsext_v2i16_v2i64
267; dupzext_v2i8_v2i16
268; dupzext_v4i8_v4i16
269; dupzext_v2i8_v2i32
270; dupzext_v4i8_v4i32
271; dupzext_v2i8_v2i64
272; dupzext_v2i16_v2i32
273; dupzext_v2i16_v2i64
274
275; Unsupported states
276
277define <8 x i16> @nonsplat_shuffleinsert(i8 %src, <8 x i8> %b) {
278; CHECK-SD-LABEL: nonsplat_shuffleinsert:
279; CHECK-SD:       // %bb.0: // %entry
280; CHECK-SD-NEXT:    dup v1.8b, w0
281; CHECK-SD-NEXT:    smull v0.8h, v1.8b, v0.8b
282; CHECK-SD-NEXT:    ret
283;
284; CHECK-GI-LABEL: nonsplat_shuffleinsert:
285; CHECK-GI:       // %bb.0: // %entry
286; CHECK-GI-NEXT:    lsl w8, w0, #8
287; CHECK-GI-NEXT:    sshll v0.8h, v0.8b, #0
288; CHECK-GI-NEXT:    sbfx w8, w8, #8, #8
289; CHECK-GI-NEXT:    mov v1.h[1], w8
290; CHECK-GI-NEXT:    ext v1.16b, v1.16b, v1.16b, #4
291; CHECK-GI-NEXT:    mul v0.8h, v1.8h, v0.8h
292; CHECK-GI-NEXT:    ret
293entry:
294  %in = sext i8 %src to i16
295  %ext.b = sext <8 x i8> %b to <8 x i16>
296  %broadcast.splatinsert = insertelement <8 x i16> undef, i16 %in, i16 1
297  %broadcast.splat = shufflevector <8 x i16> %broadcast.splatinsert, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1>
298  %out = mul nsw <8 x i16> %broadcast.splat, %ext.b
299  ret <8 x i16> %out
300}
301
302define <4 x i32> @nonsplat_shuffleinsert2(<4 x i16> %b, i16 %b0, i16 %b1, i16 %b2, i16 %b3) {
303; CHECK-SD-LABEL: nonsplat_shuffleinsert2:
304; CHECK-SD:       // %bb.0: // %entry
305; CHECK-SD-NEXT:    fmov s1, w0
306; CHECK-SD-NEXT:    mov v1.h[1], w1
307; CHECK-SD-NEXT:    mov v1.h[2], w2
308; CHECK-SD-NEXT:    mov v1.h[3], w3
309; CHECK-SD-NEXT:    smull v0.4s, v1.4h, v0.4h
310; CHECK-SD-NEXT:    ret
311;
312; CHECK-GI-LABEL: nonsplat_shuffleinsert2:
313; CHECK-GI:       // %bb.0: // %entry
314; CHECK-GI-NEXT:    sxth w8, w0
315; CHECK-GI-NEXT:    sshll v0.4s, v0.4h, #0
316; CHECK-GI-NEXT:    mov v1.s[0], w8
317; CHECK-GI-NEXT:    sxth w8, w1
318; CHECK-GI-NEXT:    mov v1.s[1], w8
319; CHECK-GI-NEXT:    sxth w8, w2
320; CHECK-GI-NEXT:    mov v1.s[2], w8
321; CHECK-GI-NEXT:    sxth w8, w3
322; CHECK-GI-NEXT:    mov v1.s[3], w8
323; CHECK-GI-NEXT:    mul v0.4s, v1.4s, v0.4s
324; CHECK-GI-NEXT:    ret
325entry:
326  %s0 = sext i16 %b0 to i32
327  %s1 = sext i16 %b1 to i32
328  %s2 = sext i16 %b2 to i32
329  %s3 = sext i16 %b3 to i32
330  %ext.b = sext <4 x i16> %b to <4 x i32>
331  %v0 = insertelement <4 x i32> undef, i32 %s0, i32 0
332  %v1 = insertelement <4 x i32> %v0, i32 %s1, i32 1
333  %v2 = insertelement <4 x i32> %v1, i32 %s2, i32 2
334  %v3 = insertelement <4 x i32> %v2, i32 %s3, i32 3
335  %out = mul nsw <4 x i32> %v3, %ext.b
336  ret <4 x i32> %out
337}
338
339define void @typei1_orig(i64 %a, ptr %p, ptr %q) {
340; CHECK-SD-LABEL: typei1_orig:
341; CHECK-SD:       // %bb.0:
342; CHECK-SD-NEXT:    cmp x0, #0
343; CHECK-SD-NEXT:    ldr q0, [x2]
344; CHECK-SD-NEXT:    cset w8, gt
345; CHECK-SD-NEXT:    dup v1.8h, w8
346; CHECK-SD-NEXT:    cmtst v0.8h, v0.8h, v0.8h
347; CHECK-SD-NEXT:    cmeq v1.8h, v1.8h, #0
348; CHECK-SD-NEXT:    bic v0.16b, v0.16b, v1.16b
349; CHECK-SD-NEXT:    xtn v0.8b, v0.8h
350; CHECK-SD-NEXT:    str q0, [x1]
351; CHECK-SD-NEXT:    ret
352;
353; CHECK-GI-LABEL: typei1_orig:
354; CHECK-GI:       // %bb.0:
355; CHECK-GI-NEXT:    ldr q1, [x2]
356; CHECK-GI-NEXT:    cmp x0, #0
357; CHECK-GI-NEXT:    movi v0.2d, #0xffffffffffffffff
358; CHECK-GI-NEXT:    cset w8, gt
359; CHECK-GI-NEXT:    neg v1.8h, v1.8h
360; CHECK-GI-NEXT:    dup v2.8h, w8
361; CHECK-GI-NEXT:    mvn v0.16b, v0.16b
362; CHECK-GI-NEXT:    mul v1.8h, v1.8h, v2.8h
363; CHECK-GI-NEXT:    cmeq v1.8h, v1.8h, #0
364; CHECK-GI-NEXT:    mvn v1.16b, v1.16b
365; CHECK-GI-NEXT:    uzp1 v0.16b, v1.16b, v0.16b
366; CHECK-GI-NEXT:    shl v0.16b, v0.16b, #7
367; CHECK-GI-NEXT:    sshr v0.16b, v0.16b, #7
368; CHECK-GI-NEXT:    str q0, [x1]
369; CHECK-GI-NEXT:    ret
370  %tmp = xor <16 x i1> zeroinitializer, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
371  %tmp6 = load <8 x i16>, ptr %q, align 2
372  %tmp7 = sub <8 x i16> zeroinitializer, %tmp6
373  %tmp8 = shufflevector <8 x i16> %tmp7, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
374  %tmp9 = icmp slt i64 0, %a
375  %tmp10 = zext i1 %tmp9 to i16
376  %tmp11 = insertelement <16 x i16> undef, i16 %tmp10, i64 0
377  %tmp12 = shufflevector <16 x i16> %tmp11, <16 x i16> undef, <16 x i32> zeroinitializer
378  %tmp13 = mul nuw <16 x i16> %tmp8, %tmp12
379  %tmp14 = icmp ne <16 x i16> %tmp13, zeroinitializer
380  %tmp15 = and <16 x i1> %tmp14, %tmp
381  %tmp16 = sext <16 x i1> %tmp15 to <16 x i8>
382  store <16 x i8> %tmp16, ptr %p, align 1
383  ret void
384}
385
386define <8 x i16> @typei1_v8i1_v8i16(i1 %src, <8 x i1> %b) {
387; CHECK-SD-LABEL: typei1_v8i1_v8i16:
388; CHECK-SD:       // %bb.0: // %entry
389; CHECK-SD-NEXT:    movi v1.8b, #1
390; CHECK-SD-NEXT:    and w8, w0, #0x1
391; CHECK-SD-NEXT:    dup v2.8b, w8
392; CHECK-SD-NEXT:    and v0.8b, v0.8b, v1.8b
393; CHECK-SD-NEXT:    umull v0.8h, v2.8b, v0.8b
394; CHECK-SD-NEXT:    ret
395;
396; CHECK-GI-LABEL: typei1_v8i1_v8i16:
397; CHECK-GI:       // %bb.0: // %entry
398; CHECK-GI-NEXT:    movi v1.8h, #1
399; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
400; CHECK-GI-NEXT:    and w8, w0, #0x1
401; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
402; CHECK-GI-NEXT:    dup v1.8h, w8
403; CHECK-GI-NEXT:    mul v0.8h, v1.8h, v0.8h
404; CHECK-GI-NEXT:    ret
405entry:
406  %in = zext i1 %src to i16
407  %ext.b = zext <8 x i1> %b to <8 x i16>
408  %broadcast.splatinsert = insertelement <8 x i16> undef, i16 %in, i16 0
409  %broadcast.splat = shufflevector <8 x i16> %broadcast.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
410  %out = mul nsw <8 x i16> %broadcast.splat, %ext.b
411  ret <8 x i16> %out
412}
413
414define <8 x i16> @missing_insert(<8 x i8> %b) {
415; CHECK-SD-LABEL: missing_insert:
416; CHECK-SD:       // %bb.0: // %entry
417; CHECK-SD-NEXT:    ext v1.8b, v0.8b, v0.8b, #2
418; CHECK-SD-NEXT:    smull v0.8h, v1.8b, v0.8b
419; CHECK-SD-NEXT:    ret
420;
421; CHECK-GI-LABEL: missing_insert:
422; CHECK-GI:       // %bb.0: // %entry
423; CHECK-GI-NEXT:    sshll v0.8h, v0.8b, #0
424; CHECK-GI-NEXT:    ext v1.16b, v0.16b, v0.16b, #4
425; CHECK-GI-NEXT:    mul v0.8h, v1.8h, v0.8h
426; CHECK-GI-NEXT:    ret
427entry:
428  %ext.b = sext <8 x i8> %b to <8 x i16>
429  %broadcast.splat = shufflevector <8 x i16> %ext.b, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1>
430  %out = mul nsw <8 x i16> %broadcast.splat, %ext.b
431  ret <8 x i16> %out
432}
433
434define <8 x i16> @shufsext_v8i8_v8i16(<8 x i8> %src, <8 x i8> %b) {
435; CHECK-SD-LABEL: shufsext_v8i8_v8i16:
436; CHECK-SD:       // %bb.0: // %entry
437; CHECK-SD-NEXT:    rev64 v0.8b, v0.8b
438; CHECK-SD-NEXT:    smull v0.8h, v0.8b, v1.8b
439; CHECK-SD-NEXT:    ret
440;
441; CHECK-GI-LABEL: shufsext_v8i8_v8i16:
442; CHECK-GI:       // %bb.0: // %entry
443; CHECK-GI-NEXT:    sshll v0.8h, v0.8b, #0
444; CHECK-GI-NEXT:    sshll v1.8h, v1.8b, #0
445; CHECK-GI-NEXT:    rev64 v0.8h, v0.8h
446; CHECK-GI-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
447; CHECK-GI-NEXT:    mul v0.8h, v0.8h, v1.8h
448; CHECK-GI-NEXT:    ret
449entry:
450  %in = sext <8 x i8> %src to <8 x i16>
451  %ext.b = sext <8 x i8> %b to <8 x i16>
452  %shuf = shufflevector <8 x i16> %in, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
453  %out = mul nsw <8 x i16> %shuf, %ext.b
454  ret <8 x i16> %out
455}
456
457define <2 x i64> @shufsext_v2i32_v2i64(<2 x i32> %src, <2 x i32> %b) {
458; CHECK-SD-LABEL: shufsext_v2i32_v2i64:
459; CHECK-SD:       // %bb.0: // %entry
460; CHECK-SD-NEXT:    rev64 v0.2s, v0.2s
461; CHECK-SD-NEXT:    smull v0.2d, v0.2s, v1.2s
462; CHECK-SD-NEXT:    ret
463;
464; CHECK-GI-LABEL: shufsext_v2i32_v2i64:
465; CHECK-GI:       // %bb.0: // %entry
466; CHECK-GI-NEXT:    sshll v0.2d, v0.2s, #0
467; CHECK-GI-NEXT:    sshll v1.2d, v1.2s, #0
468; CHECK-GI-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
469; CHECK-GI-NEXT:    fmov x9, d1
470; CHECK-GI-NEXT:    mov x11, v1.d[1]
471; CHECK-GI-NEXT:    fmov x8, d0
472; CHECK-GI-NEXT:    mov x10, v0.d[1]
473; CHECK-GI-NEXT:    mul x8, x8, x9
474; CHECK-GI-NEXT:    mul x9, x10, x11
475; CHECK-GI-NEXT:    mov v0.d[0], x8
476; CHECK-GI-NEXT:    mov v0.d[1], x9
477; CHECK-GI-NEXT:    ret
478entry:
479  %in = sext <2 x i32> %src to <2 x i64>
480  %ext.b = sext <2 x i32> %b to <2 x i64>
481  %shuf = shufflevector <2 x i64> %in, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
482  %out = mul nsw <2 x i64> %shuf, %ext.b
483  ret <2 x i64> %out
484}
485
486define <8 x i16> @shufzext_v8i8_v8i16(<8 x i8> %src, <8 x i8> %b) {
487; CHECK-SD-LABEL: shufzext_v8i8_v8i16:
488; CHECK-SD:       // %bb.0: // %entry
489; CHECK-SD-NEXT:    rev64 v0.8b, v0.8b
490; CHECK-SD-NEXT:    umull v0.8h, v0.8b, v1.8b
491; CHECK-SD-NEXT:    ret
492;
493; CHECK-GI-LABEL: shufzext_v8i8_v8i16:
494; CHECK-GI:       // %bb.0: // %entry
495; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
496; CHECK-GI-NEXT:    ushll v1.8h, v1.8b, #0
497; CHECK-GI-NEXT:    rev64 v0.8h, v0.8h
498; CHECK-GI-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
499; CHECK-GI-NEXT:    mul v0.8h, v0.8h, v1.8h
500; CHECK-GI-NEXT:    ret
501entry:
502  %in = zext <8 x i8> %src to <8 x i16>
503  %ext.b = zext <8 x i8> %b to <8 x i16>
504  %shuf = shufflevector <8 x i16> %in, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
505  %out = mul nsw <8 x i16> %shuf, %ext.b
506  ret <8 x i16> %out
507}
508
509define <2 x i64> @shufzext_v2i32_v2i64(<2 x i32> %src, <2 x i32> %b) {
510; CHECK-SD-LABEL: shufzext_v2i32_v2i64:
511; CHECK-SD:       // %bb.0: // %entry
512; CHECK-SD-NEXT:    rev64 v0.2s, v0.2s
513; CHECK-SD-NEXT:    smull v0.2d, v0.2s, v1.2s
514; CHECK-SD-NEXT:    ret
515;
516; CHECK-GI-LABEL: shufzext_v2i32_v2i64:
517; CHECK-GI:       // %bb.0: // %entry
518; CHECK-GI-NEXT:    sshll v0.2d, v0.2s, #0
519; CHECK-GI-NEXT:    sshll v1.2d, v1.2s, #0
520; CHECK-GI-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
521; CHECK-GI-NEXT:    fmov x9, d1
522; CHECK-GI-NEXT:    mov x11, v1.d[1]
523; CHECK-GI-NEXT:    fmov x8, d0
524; CHECK-GI-NEXT:    mov x10, v0.d[1]
525; CHECK-GI-NEXT:    mul x8, x8, x9
526; CHECK-GI-NEXT:    mul x9, x10, x11
527; CHECK-GI-NEXT:    mov v0.d[0], x8
528; CHECK-GI-NEXT:    mov v0.d[1], x9
529; CHECK-GI-NEXT:    ret
530entry:
531  %in = sext <2 x i32> %src to <2 x i64>
532  %ext.b = sext <2 x i32> %b to <2 x i64>
533  %shuf = shufflevector <2 x i64> %in, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
534  %out = mul nsw <2 x i64> %shuf, %ext.b
535  ret <2 x i64> %out
536}
537
538define <8 x i16> @shufzext_v8i8_v8i16_twoin(<8 x i8> %src1, <8 x i8> %src2, <8 x i8> %b) {
539; CHECK-SD-LABEL: shufzext_v8i8_v8i16_twoin:
540; CHECK-SD:       // %bb.0: // %entry
541; CHECK-SD-NEXT:    trn1 v0.8b, v0.8b, v1.8b
542; CHECK-SD-NEXT:    umull v0.8h, v0.8b, v2.8b
543; CHECK-SD-NEXT:    ret
544;
545; CHECK-GI-LABEL: shufzext_v8i8_v8i16_twoin:
546; CHECK-GI:       // %bb.0: // %entry
547; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
548; CHECK-GI-NEXT:    ushll v1.8h, v1.8b, #0
549; CHECK-GI-NEXT:    trn1 v0.8h, v0.8h, v1.8h
550; CHECK-GI-NEXT:    ushll v1.8h, v2.8b, #0
551; CHECK-GI-NEXT:    mul v0.8h, v0.8h, v1.8h
552; CHECK-GI-NEXT:    ret
553entry:
554  %in1 = zext <8 x i8> %src1 to <8 x i16>
555  %in2 = zext <8 x i8> %src2 to <8 x i16>
556  %ext.b = zext <8 x i8> %b to <8 x i16>
557  %shuf = shufflevector <8 x i16> %in1, <8 x i16> %in2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
558  %out = mul nsw <8 x i16> %shuf, %ext.b
559  ret <8 x i16> %out
560}
561
562define <8 x i16> @shufszext_v8i8_v8i16_twoin(<8 x i8> %src1, <8 x i8> %src2, <8 x i8> %b) {
563; CHECK-LABEL: shufszext_v8i8_v8i16_twoin:
564; CHECK:       // %bb.0: // %entry
565; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
566; CHECK-NEXT:    sshll v1.8h, v1.8b, #0
567; CHECK-NEXT:    trn1 v0.8h, v0.8h, v1.8h
568; CHECK-NEXT:    ushll v1.8h, v2.8b, #0
569; CHECK-NEXT:    mul v0.8h, v0.8h, v1.8h
570; CHECK-NEXT:    ret
571entry:
572  %in1 = zext <8 x i8> %src1 to <8 x i16>
573  %in2 = sext <8 x i8> %src2 to <8 x i16>
574  %ext.b = zext <8 x i8> %b to <8 x i16>
575  %shuf = shufflevector <8 x i16> %in1, <8 x i16> %in2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
576  %out = mul nsw <8 x i16> %shuf, %ext.b
577  ret <8 x i16> %out
578}
579
580