xref: /llvm-project/llvm/test/CodeGen/AArch64/arm64-zip.ll (revision f6947e479e14e7904aa0b2539a95f5dfdc8f9295)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4
5; CHECK-GI:       warning: Instruction selection used fallback path for shuffle_zip1
6; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for shuffle_zip2
7; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for shuffle_zip3
8
9define <8 x i8> @vzipi8(ptr %A, ptr %B) nounwind {
10; CHECK-LABEL: vzipi8:
11; CHECK:       // %bb.0:
12; CHECK-NEXT:    ldr d0, [x0]
13; CHECK-NEXT:    ldr d1, [x1]
14; CHECK-NEXT:    zip1.8b v2, v0, v1
15; CHECK-NEXT:    zip2.8b v0, v0, v1
16; CHECK-NEXT:    add.8b v0, v2, v0
17; CHECK-NEXT:    ret
18  %tmp1 = load <8 x i8>, ptr %A
19  %tmp2 = load <8 x i8>, ptr %B
20  %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
21  %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
22  %tmp5 = add <8 x i8> %tmp3, %tmp4
23  ret <8 x i8> %tmp5
24}
25
26define <4 x i16> @vzipi16(ptr %A, ptr %B) nounwind {
27; CHECK-LABEL: vzipi16:
28; CHECK:       // %bb.0:
29; CHECK-NEXT:    ldr d0, [x0]
30; CHECK-NEXT:    ldr d1, [x1]
31; CHECK-NEXT:    zip1.4h v2, v0, v1
32; CHECK-NEXT:    zip2.4h v0, v0, v1
33; CHECK-NEXT:    add.4h v0, v2, v0
34; CHECK-NEXT:    ret
35  %tmp1 = load <4 x i16>, ptr %A
36  %tmp2 = load <4 x i16>, ptr %B
37  %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
38  %tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
39  %tmp5 = add <4 x i16> %tmp3, %tmp4
40  ret <4 x i16> %tmp5
41}
42
43define <16 x i8> @vzipQi8(ptr %A, ptr %B) nounwind {
44; CHECK-LABEL: vzipQi8:
45; CHECK:       // %bb.0:
46; CHECK-NEXT:    ldr q0, [x0]
47; CHECK-NEXT:    ldr q1, [x1]
48; CHECK-NEXT:    zip1.16b v2, v0, v1
49; CHECK-NEXT:    zip2.16b v0, v0, v1
50; CHECK-NEXT:    add.16b v0, v2, v0
51; CHECK-NEXT:    ret
52  %tmp1 = load <16 x i8>, ptr %A
53  %tmp2 = load <16 x i8>, ptr %B
54  %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
55  %tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
56  %tmp5 = add <16 x i8> %tmp3, %tmp4
57  ret <16 x i8> %tmp5
58}
59
60define <8 x i16> @vzipQi16(ptr %A, ptr %B) nounwind {
61; CHECK-LABEL: vzipQi16:
62; CHECK:       // %bb.0:
63; CHECK-NEXT:    ldr q0, [x0]
64; CHECK-NEXT:    ldr q1, [x1]
65; CHECK-NEXT:    zip1.8h v2, v0, v1
66; CHECK-NEXT:    zip2.8h v0, v0, v1
67; CHECK-NEXT:    add.8h v0, v2, v0
68; CHECK-NEXT:    ret
69  %tmp1 = load <8 x i16>, ptr %A
70  %tmp2 = load <8 x i16>, ptr %B
71  %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
72  %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
73  %tmp5 = add <8 x i16> %tmp3, %tmp4
74  ret <8 x i16> %tmp5
75}
76
77define <4 x i32> @vzipQi32(ptr %A, ptr %B) nounwind {
78; CHECK-LABEL: vzipQi32:
79; CHECK:       // %bb.0:
80; CHECK-NEXT:    ldr q0, [x0]
81; CHECK-NEXT:    ldr q1, [x1]
82; CHECK-NEXT:    zip1.4s v2, v0, v1
83; CHECK-NEXT:    zip2.4s v0, v0, v1
84; CHECK-NEXT:    add.4s v0, v2, v0
85; CHECK-NEXT:    ret
86  %tmp1 = load <4 x i32>, ptr %A
87  %tmp2 = load <4 x i32>, ptr %B
88  %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
89  %tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
90  %tmp5 = add <4 x i32> %tmp3, %tmp4
91  ret <4 x i32> %tmp5
92}
93
94define <4 x float> @vzipQf(ptr %A, ptr %B) nounwind {
95; CHECK-LABEL: vzipQf:
96; CHECK:       // %bb.0:
97; CHECK-NEXT:    ldr q0, [x0]
98; CHECK-NEXT:    ldr q1, [x1]
99; CHECK-NEXT:    zip1.4s v2, v0, v1
100; CHECK-NEXT:    zip2.4s v0, v0, v1
101; CHECK-NEXT:    fadd.4s v0, v2, v0
102; CHECK-NEXT:    ret
103  %tmp1 = load <4 x float>, ptr %A
104  %tmp2 = load <4 x float>, ptr %B
105  %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
106  %tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
107  %tmp5 = fadd <4 x float> %tmp3, %tmp4
108  ret <4 x float> %tmp5
109}
110
111; Undef shuffle indices should not prevent matching to VZIP:
112
113define <8 x i8> @vzipi8_undef(ptr %A, ptr %B) nounwind {
114; CHECK-LABEL: vzipi8_undef:
115; CHECK:       // %bb.0:
116; CHECK-NEXT:    ldr d0, [x0]
117; CHECK-NEXT:    ldr d1, [x1]
118; CHECK-NEXT:    zip1.8b v2, v0, v1
119; CHECK-NEXT:    zip2.8b v0, v0, v1
120; CHECK-NEXT:    add.8b v0, v2, v0
121; CHECK-NEXT:    ret
122  %tmp1 = load <8 x i8>, ptr %A
123  %tmp2 = load <8 x i8>, ptr %B
124  %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 undef, i32 10, i32 3, i32 11>
125  %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 undef, i32 undef, i32 15>
126  %tmp5 = add <8 x i8> %tmp3, %tmp4
127  ret <8 x i8> %tmp5
128}
129
130define <16 x i8> @vzipQi8_undef(ptr %A, ptr %B) nounwind {
131; CHECK-LABEL: vzipQi8_undef:
132; CHECK:       // %bb.0:
133; CHECK-NEXT:    ldr q0, [x0]
134; CHECK-NEXT:    ldr q1, [x1]
135; CHECK-NEXT:    zip1.16b v2, v0, v1
136; CHECK-NEXT:    zip2.16b v0, v0, v1
137; CHECK-NEXT:    add.16b v0, v2, v0
138; CHECK-NEXT:    ret
139  %tmp1 = load <16 x i8>, ptr %A
140  %tmp2 = load <16 x i8>, ptr %B
141  %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 undef, i32 undef, i32 undef, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
142  %tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 undef, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 undef, i32 14, i32 30, i32 undef, i32 31>
143  %tmp5 = add <16 x i8> %tmp3, %tmp4
144  ret <16 x i8> %tmp5
145}
146
147define <8 x i16> @vzip1_undef_01(<8 x i16> %A, <8 x i16> %B) nounwind {
148; CHECK-LABEL: vzip1_undef_01:
149; CHECK:       // %bb.0:
150; CHECK-NEXT:    zip1.8h v0, v0, v1
151; CHECK-NEXT:    ret
152  %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
153  ret <8 x i16> %s
154}
155
156define <8 x i16> @vzip1_undef_0(<8 x i16> %A, <8 x i16> %B) nounwind {
157; CHECK-LABEL: vzip1_undef_0:
158; CHECK:       // %bb.0:
159; CHECK-NEXT:    zip1.8h v0, v0, v1
160; CHECK-NEXT:    ret
161  %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
162  ret <8 x i16> %s
163}
164
165define <8 x i16> @vzip1_undef_1(<8 x i16> %A, <8 x i16> %B) nounwind {
166; CHECK-LABEL: vzip1_undef_1:
167; CHECK:       // %bb.0:
168; CHECK-NEXT:    zip1.8h v0, v0, v1
169; CHECK-NEXT:    ret
170  %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
171  ret <8 x i16> %s
172}
173
174define <8 x i16> @vzip1_undef_012(<8 x i16> %A, <8 x i16> %B) nounwind {
175; CHECK-LABEL: vzip1_undef_012:
176; CHECK:       // %bb.0:
177; CHECK-NEXT:    zip1.8h v0, v0, v1
178; CHECK-NEXT:    ret
179  %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 9, i32 2, i32 10, i32 3, i32 11>
180  ret <8 x i16> %s
181}
182
183define <8 x i16> @vzip2_undef_01(<8 x i16> %A, <8 x i16> %B) nounwind {
184; CHECK-LABEL: vzip2_undef_01:
185; CHECK:       // %bb.0:
186; CHECK-NEXT:    zip2.8h v0, v0, v1
187; CHECK-NEXT:    ret
188  %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
189  ret <8 x i16> %s
190}
191
192define <8 x i16> @vzip2_undef_0(<8 x i16> %A, <8 x i16> %B) nounwind {
193; CHECK-LABEL: vzip2_undef_0:
194; CHECK:       // %bb.0:
195; CHECK-NEXT:    zip2.8h v0, v0, v1
196; CHECK-NEXT:    ret
197  %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
198  ret <8 x i16> %s
199}
200
201define <8 x i16> @vzip2_undef_1(<8 x i16> %A, <8 x i16> %B) nounwind {
202; CHECK-LABEL: vzip2_undef_1:
203; CHECK:       // %bb.0:
204; CHECK-NEXT:    zip2.8h v0, v0, v1
205; CHECK-NEXT:    ret
206  %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 4, i32 undef, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
207  ret <8 x i16> %s
208}
209
210define <8 x i16> @vzip2_undef_012(<8 x i16> %A, <8 x i16> %B) nounwind {
211; CHECK-LABEL: vzip2_undef_012:
212; CHECK:       // %bb.0:
213; CHECK-NEXT:    zip2.8h v0, v0, v1
214; CHECK-NEXT:    ret
215  %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 13, i32 6, i32 14, i32 7, i32 15>
216  ret <8 x i16> %s
217}
218
219define <16 x i8> @combine_v16i8(<8 x i8> %0, <8 x i8> %1) {
220; CHECK-LABEL: combine_v16i8:
221; CHECK:       // %bb.0:
222; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
223; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
224; CHECK-NEXT:    zip1.16b v0, v0, v1
225; CHECK-NEXT:    ret
226  %3 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
227  ret <16 x i8> %3
228}
229
230define <16 x i8> @combine2_v16i8(<8 x i8> %0, <8 x i8> %1) {
231; CHECK-SD-LABEL: combine2_v16i8:
232; CHECK-SD:       // %bb.0:
233; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
234; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
235; CHECK-SD-NEXT:    zip1.16b v0, v0, v1
236; CHECK-SD-NEXT:    ret
237;
238; CHECK-GI-LABEL: combine2_v16i8:
239; CHECK-GI:       // %bb.0:
240; CHECK-GI-NEXT:    zip1.8b v2, v0, v1
241; CHECK-GI-NEXT:    zip2.8b v0, v0, v1
242; CHECK-GI-NEXT:    mov.d v2[1], v0[0]
243; CHECK-GI-NEXT:    mov.16b v0, v2
244; CHECK-GI-NEXT:    ret
245  %3 = shufflevector <8 x i8> %0, <8 x i8> %1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
246  %4 = shufflevector <8 x i8> %0, <8 x i8> %1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
247  %5 = shufflevector <8 x i8> %3, <8 x i8> %4, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
248  ret <16 x i8> %5
249}
250
251define <8 x i16> @combine_v8i16(<4 x i16> %0, <4 x i16> %1) {
252; CHECK-LABEL: combine_v8i16:
253; CHECK:       // %bb.0:
254; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
255; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
256; CHECK-NEXT:    zip1.8h v0, v0, v1
257; CHECK-NEXT:    ret
258  %3 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
259  ret <8 x i16> %3
260}
261
262define <8 x i16> @combine2_v8i16(<4 x i16> %0, <4 x i16> %1) {
263; CHECK-SD-LABEL: combine2_v8i16:
264; CHECK-SD:       // %bb.0:
265; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
266; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
267; CHECK-SD-NEXT:    zip1.8h v0, v0, v1
268; CHECK-SD-NEXT:    ret
269;
270; CHECK-GI-LABEL: combine2_v8i16:
271; CHECK-GI:       // %bb.0:
272; CHECK-GI-NEXT:    zip1.4h v2, v0, v1
273; CHECK-GI-NEXT:    zip2.4h v0, v0, v1
274; CHECK-GI-NEXT:    mov.d v2[1], v0[0]
275; CHECK-GI-NEXT:    mov.16b v0, v2
276; CHECK-GI-NEXT:    ret
277  %3 = shufflevector <4 x i16> %0, <4 x i16> %1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
278  %4 = shufflevector <4 x i16> %0, <4 x i16> %1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
279  %5 = shufflevector <4 x i16> %3, <4 x i16> %4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
280  ret <8 x i16> %5
281}
282
283define <4 x i32> @combine_v4i32(<2 x i32> %0, <2 x i32> %1) {
284; CHECK-LABEL: combine_v4i32:
285; CHECK:       // %bb.0:
286; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
287; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
288; CHECK-NEXT:    zip1.4s v0, v0, v1
289; CHECK-NEXT:    ret
290  %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
291  ret <4 x i32> %3
292}
293
294define <4 x i32> @combine2_v4i32(<2 x i32> %0, <2 x i32> %1) {
295; CHECK-SD-LABEL: combine2_v4i32:
296; CHECK-SD:       // %bb.0:
297; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
298; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
299; CHECK-SD-NEXT:    zip1.4s v0, v0, v1
300; CHECK-SD-NEXT:    ret
301;
302; CHECK-GI-LABEL: combine2_v4i32:
303; CHECK-GI:       // %bb.0:
304; CHECK-GI-NEXT:    zip1.2s v2, v0, v1
305; CHECK-GI-NEXT:    zip2.2s v0, v0, v1
306; CHECK-GI-NEXT:    mov.d v2[1], v0[0]
307; CHECK-GI-NEXT:    mov.16b v0, v2
308; CHECK-GI-NEXT:    ret
309  %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <2 x i32> <i32 0, i32 2>
310  %4 = shufflevector <2 x i32> %0, <2 x i32> %1, <2 x i32> <i32 1, i32 3>
311  %5 = shufflevector <2 x i32> %3, <2 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
312  ret <4 x i32> %5
313}
314
315define <16 x i8> @combine_v16i8_undef(<8 x i8> %0, <8 x i8> %1) {
316; CHECK-LABEL: combine_v16i8_undef:
317; CHECK:       // %bb.0:
318; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
319; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
320; CHECK-NEXT:    zip1.16b v0, v0, v1
321; CHECK-NEXT:    ret
322  %3 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
323  ret <16 x i8> %3
324}
325
326define <16 x i8> @combine2_v16i8_undef(<8 x i8> %0, <8 x i8> %1) {
327; CHECK-SD-LABEL: combine2_v16i8_undef:
328; CHECK-SD:       // %bb.0:
329; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
330; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
331; CHECK-SD-NEXT:    zip1.16b v0, v0, v1
332; CHECK-SD-NEXT:    ret
333;
334; CHECK-GI-LABEL: combine2_v16i8_undef:
335; CHECK-GI:       // %bb.0:
336; CHECK-GI-NEXT:    zip1.8b v2, v0, v1
337; CHECK-GI-NEXT:    zip2.8b v0, v0, v1
338; CHECK-GI-NEXT:    mov.d v2[1], v0[0]
339; CHECK-GI-NEXT:    mov.16b v0, v2
340; CHECK-GI-NEXT:    ret
341  %3 = shufflevector <8 x i8> %0, <8 x i8> %1, <8 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
342  %4 = shufflevector <8 x i8> %0, <8 x i8> %1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
343  %5 = shufflevector <8 x i8> %3, <8 x i8> %4, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
344  ret <16 x i8> %5
345}
346
347define <8 x i16> @combine_v8i16_undef(<4 x i16> %0, <4 x i16> %1) {
348; CHECK-LABEL: combine_v8i16_undef:
349; CHECK:       // %bb.0:
350; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
351; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
352; CHECK-NEXT:    zip1.8h v0, v0, v1
353; CHECK-NEXT:    ret
354  %3 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 undef, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
355  ret <8 x i16> %3
356}
357
358; FIXME: This could be zip1 too, 8,0,9,1... pattern is handled
359define <16 x i8> @combine_v8i16_8first(<8 x i8> %0, <8 x i8> %1) {
360; CHECK-SD-LABEL: combine_v8i16_8first:
361; CHECK-SD:       // %bb.0:
362; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1_q2
363; CHECK-SD-NEXT:    adrp x8, .LCPI25_0
364; CHECK-SD-NEXT:    fmov d2, d0
365; CHECK-SD-NEXT:    ldr q3, [x8, :lo12:.LCPI25_0]
366; CHECK-SD-NEXT:    tbl.16b v0, { v1, v2 }, v3
367; CHECK-SD-NEXT:    ret
368;
369; CHECK-GI-LABEL: combine_v8i16_8first:
370; CHECK-GI:       // %bb.0:
371; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q31_q0
372; CHECK-GI-NEXT:    adrp x8, .LCPI25_0
373; CHECK-GI-NEXT:    fmov d31, d1
374; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI25_0]
375; CHECK-GI-NEXT:    tbl.16b v0, { v31, v0 }, v2
376; CHECK-GI-NEXT:    ret
377  %3 = shufflevector <8 x i8> %1, <8 x i8> %0, <16 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3, i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7>
378  ret <16 x i8> %3
379}
380
381
382; FIXME: This could be zip1 too, 8,0,9,1... pattern is handled
383define <16 x i8> @combine_v8i16_8firstundef(<8 x i8> %0, <8 x i8> %1) {
384; CHECK-SD-LABEL: combine_v8i16_8firstundef:
385; CHECK-SD:       // %bb.0:
386; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1_q2
387; CHECK-SD-NEXT:    adrp x8, .LCPI26_0
388; CHECK-SD-NEXT:    fmov d2, d0
389; CHECK-SD-NEXT:    ldr q3, [x8, :lo12:.LCPI26_0]
390; CHECK-SD-NEXT:    tbl.16b v0, { v1, v2 }, v3
391; CHECK-SD-NEXT:    ret
392;
393; CHECK-GI-LABEL: combine_v8i16_8firstundef:
394; CHECK-GI:       // %bb.0:
395; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q31_q0
396; CHECK-GI-NEXT:    adrp x8, .LCPI26_0
397; CHECK-GI-NEXT:    fmov d31, d1
398; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI26_0]
399; CHECK-GI-NEXT:    tbl.16b v0, { v31, v0 }, v2
400; CHECK-GI-NEXT:    ret
401  %3 = shufflevector <8 x i8> %1, <8 x i8> %0, <16 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3, i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 undef>
402  ret <16 x i8> %3
403}
404
405define <4 x float> @shuffle_zip1(<4 x float> %arg) {
406; CHECK-LABEL: shuffle_zip1:
407; CHECK:       // %bb.0: // %bb
408; CHECK-NEXT:    movi.2d v1, #0000000000000000
409; CHECK-NEXT:    fcmgt.4s v0, v0, v1
410; CHECK-NEXT:    uzp1.8h v1, v0, v0
411; CHECK-NEXT:    xtn.4h v0, v0
412; CHECK-NEXT:    xtn.4h v1, v1
413; CHECK-NEXT:    zip2.4h v0, v0, v1
414; CHECK-NEXT:    fmov.4s v1, #1.00000000
415; CHECK-NEXT:    zip1.4h v0, v0, v0
416; CHECK-NEXT:    sshll.4s v0, v0, #0
417; CHECK-NEXT:    and.16b v0, v1, v0
418; CHECK-NEXT:    ret
419bb:
420  %inst = fcmp olt <4 x float> zeroinitializer, %arg
421  %inst1 = shufflevector <4 x i1> %inst, <4 x i1> zeroinitializer, <2 x i32> <i32 2, i32 0>
422  %inst2 = shufflevector <2 x i1> %inst1, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
423  %inst3 = select <4 x i1> %inst2, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> zeroinitializer
424  ret <4 x float> %inst3
425}
426
427define <4 x i32> @shuffle_zip2(<4 x i32> %arg) {
428; CHECK-LABEL: shuffle_zip2:
429; CHECK:       // %bb.0: // %bb
430; CHECK-NEXT:    cmtst.4s v0, v0, v0
431; CHECK-NEXT:    uzp1.8h v1, v0, v0
432; CHECK-NEXT:    xtn.4h v0, v0
433; CHECK-NEXT:    xtn.4h v1, v1
434; CHECK-NEXT:    zip2.4h v0, v0, v1
435; CHECK-NEXT:    movi.4s v1, #1
436; CHECK-NEXT:    zip1.4h v0, v0, v0
437; CHECK-NEXT:    ushll.4s v0, v0, #0
438; CHECK-NEXT:    and.16b v0, v0, v1
439; CHECK-NEXT:    ret
440bb:
441  %inst = icmp ult <4 x i32> zeroinitializer, %arg
442  %inst1 = shufflevector <4 x i1> %inst, <4 x i1> zeroinitializer, <2 x i32> <i32 2, i32 0>
443  %inst2 = shufflevector <2 x i1> %inst1, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
444  %inst3 = select <4 x i1> %inst2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> zeroinitializer
445  ret <4 x i32> %inst3
446}
447
448define <4 x i32> @shuffle_zip3(<4 x i32> %arg) {
449; CHECK-LABEL: shuffle_zip3:
450; CHECK:       // %bb.0: // %bb
451; CHECK-NEXT:    cmgt.4s v0, v0, #0
452; CHECK-NEXT:    uzp1.8h v1, v0, v0
453; CHECK-NEXT:    xtn.4h v0, v0
454; CHECK-NEXT:    xtn.4h v1, v1
455; CHECK-NEXT:    zip2.4h v0, v0, v1
456; CHECK-NEXT:    movi.4s v1, #1
457; CHECK-NEXT:    zip1.4h v0, v0, v0
458; CHECK-NEXT:    sshll.4s v0, v0, #0
459; CHECK-NEXT:    and.16b v0, v0, v1
460; CHECK-NEXT:    ret
461bb:
462  %inst = icmp slt <4 x i32> zeroinitializer, %arg
463  %inst1 = shufflevector <4 x i1> %inst, <4 x i1> zeroinitializer, <2 x i32> <i32 2, i32 0>
464  %inst2 = shufflevector <2 x i1> %inst1, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
465  %inst3 = select <4 x i1> %inst2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> zeroinitializer
466  ret <4 x i32> %inst3
467}
468