xref: /llvm-project/llvm/test/CodeGen/AArch64/shufflevector.ll (revision 61510b51c33464a6bc15e4cf5b1ee07e2e0ec1c9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4
5; ===== Legal Vector Types =====
6
7define <8 x i8> @shufflevector_v8i8(<8 x i8> %a, <8 x i8> %b) {
8; CHECK-SD-LABEL: shufflevector_v8i8:
9; CHECK-SD:       // %bb.0:
10; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
11; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
12; CHECK-SD-NEXT:    adrp x8, .LCPI0_0
13; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
14; CHECK-SD-NEXT:    ldr d1, [x8, :lo12:.LCPI0_0]
15; CHECK-SD-NEXT:    tbl v0.8b, { v0.16b }, v1.8b
16; CHECK-SD-NEXT:    ret
17;
18; CHECK-GI-LABEL: shufflevector_v8i8:
19; CHECK-GI:       // %bb.0:
20; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
21; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
22; CHECK-GI-NEXT:    adrp x8, .LCPI0_0
23; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
24; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI0_0]
25; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b }, v1.16b
26; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
27; CHECK-GI-NEXT:    ret
28    %c = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12, i32 15>
29    ret <8 x i8> %c
30}
31
32define <16 x i8> @shufflevector_v16i8(<16 x i8> %a, <16 x i8> %b) {
33; CHECK-SD-LABEL: shufflevector_v16i8:
34; CHECK-SD:       // %bb.0:
35; CHECK-SD-NEXT:    adrp x8, .LCPI1_0
36; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
37; CHECK-SD-NEXT:    ldr q2, [x8, :lo12:.LCPI1_0]
38; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
39; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
40; CHECK-SD-NEXT:    ret
41;
42; CHECK-GI-LABEL: shufflevector_v16i8:
43; CHECK-GI:       // %bb.0:
44; CHECK-GI-NEXT:    adrp x8, .LCPI1_0
45; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
46; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI1_0]
47; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
48; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
49; CHECK-GI-NEXT:    ret
50    %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12, i32 15, i32 2, i32 4, i32 6, i32 8, i32 25, i32 30, i32 31, i32 31>
51    ret <16 x i8> %c
52}
53
54define <4 x i16> @shufflevector_v4i16(<4 x i16> %a, <4 x i16> %b) {
55; CHECK-LABEL: shufflevector_v4i16:
56; CHECK:       // %bb.0:
57; CHECK-NEXT:    uzp2 v0.4h, v0.4h, v1.4h
58; CHECK-NEXT:    ret
59    %c = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
60    ret <4 x i16> %c
61}
62
63define <8 x i16> @shufflevector_v8i16(<8 x i16> %a, <8 x i16> %b) {
64; CHECK-SD-LABEL: shufflevector_v8i16:
65; CHECK-SD:       // %bb.0:
66; CHECK-SD-NEXT:    adrp x8, .LCPI3_0
67; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
68; CHECK-SD-NEXT:    ldr q2, [x8, :lo12:.LCPI3_0]
69; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
70; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
71; CHECK-SD-NEXT:    ret
72;
73; CHECK-GI-LABEL: shufflevector_v8i16:
74; CHECK-GI:       // %bb.0:
75; CHECK-GI-NEXT:    adrp x8, .LCPI3_0
76; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
77; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI3_0]
78; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
79; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
80; CHECK-GI-NEXT:    ret
81    %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12, i32 15>
82    ret <8 x i16> %c
83}
84
85define <2 x i32> @shufflevector_v2i32(<2 x i32> %a, <2 x i32> %b) {
86; CHECK-LABEL: shufflevector_v2i32:
87; CHECK:       // %bb.0:
88; CHECK-NEXT:    zip2 v0.2s, v0.2s, v1.2s
89; CHECK-NEXT:    ret
90    %c = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
91    ret <2 x i32> %c
92}
93
94define <4 x i32> @shufflevector_v4i32(<4 x i32> %a, <4 x i32> %b) {
95; CHECK-LABEL: shufflevector_v4i32:
96; CHECK:       // %bb.0:
97; CHECK-NEXT:    uzp2 v0.4s, v0.4s, v1.4s
98; CHECK-NEXT:    ret
99    %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
100    ret <4 x i32> %c
101}
102
103define <2 x i64> @shufflevector_v2i64(<2 x i64> %a, <2 x i64> %b) {
104; CHECK-LABEL: shufflevector_v2i64:
105; CHECK:       // %bb.0:
106; CHECK-NEXT:    zip2 v0.2d, v0.2d, v1.2d
107; CHECK-NEXT:    ret
108    %c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
109    ret <2 x i64> %c
110}
111
112define <2 x ptr> @shufflevector_v2p0(<2 x ptr> %a, <2 x ptr> %b) {
113; CHECK-LABEL: shufflevector_v2p0:
114; CHECK:       // %bb.0:
115; CHECK-NEXT:    zip2 v0.2d, v0.2d, v1.2d
116; CHECK-NEXT:    ret
117    %c = shufflevector <2 x ptr> %a, <2 x ptr> %b, <2 x i32> <i32 1, i32 3>
118    ret <2 x ptr> %c
119}
120
121; ===== Legal Vector Types with Zero Masks =====
122
123define <8 x i8> @shufflevector_v8i8_zeroes(<8 x i8> %a, <8 x i8> %b) {
124; CHECK-LABEL: shufflevector_v8i8_zeroes:
125; CHECK:       // %bb.0:
126; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
127; CHECK-NEXT:    dup v0.8b, v0.b[0]
128; CHECK-NEXT:    ret
129    %c = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
130    ret <8 x i8> %c
131}
132
133define <16 x i8> @shufflevector_v16i8_zeroes(<16 x i8> %a, <16 x i8> %b) {
134; CHECK-LABEL: shufflevector_v16i8_zeroes:
135; CHECK:       // %bb.0:
136; CHECK-NEXT:    dup v0.16b, v0.b[0]
137; CHECK-NEXT:    ret
138    %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
139    ret <16 x i8> %c
140}
141
142define <4 x i16> @shufflevector_v4i16_zeroes(<4 x i16> %a, <4 x i16> %b) {
143; CHECK-LABEL: shufflevector_v4i16_zeroes:
144; CHECK:       // %bb.0:
145; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
146; CHECK-NEXT:    dup v0.4h, v0.h[0]
147; CHECK-NEXT:    ret
148    %c = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
149    ret <4 x i16> %c
150}
151
152define <8 x i16> @shufflevector_v8i16_zeroes(<8 x i16> %a, <8 x i16> %b) {
153; CHECK-LABEL: shufflevector_v8i16_zeroes:
154; CHECK:       // %bb.0:
155; CHECK-NEXT:    dup v0.8h, v0.h[0]
156; CHECK-NEXT:    ret
157    %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
158    ret <8 x i16> %c
159}
160
161define <2 x i32> @shufflevector_v2i32_zeroes(<2 x i32> %a, <2 x i32> %b) {
162; CHECK-LABEL: shufflevector_v2i32_zeroes:
163; CHECK:       // %bb.0:
164; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
165; CHECK-NEXT:    dup v0.2s, v0.s[0]
166; CHECK-NEXT:    ret
167    %c = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 0>
168    ret <2 x i32> %c
169}
170
171define <4 x i32> @shufflevector_v4i32_zeroes(<4 x i32> %a, <4 x i32> %b) {
172; CHECK-LABEL: shufflevector_v4i32_zeroes:
173; CHECK:       // %bb.0:
174; CHECK-NEXT:    dup v0.4s, v0.s[0]
175; CHECK-NEXT:    ret
176    %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
177    ret <4 x i32> %c
178}
179
180define <2 x i64> @shufflevector_v2i64_zeroes(<2 x i64> %a, <2 x i64> %b) {
181; CHECK-LABEL: shufflevector_v2i64_zeroes:
182; CHECK:       // %bb.0:
183; CHECK-NEXT:    dup v0.2d, v0.d[0]
184; CHECK-NEXT:    ret
185    %c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 0>
186    ret <2 x i64> %c
187}
188
189define <2 x ptr> @shufflevector_v2p0_zeroes(<2 x ptr> %a, <2 x ptr> %b) {
190; CHECK-LABEL: shufflevector_v2p0_zeroes:
191; CHECK:       // %bb.0:
192; CHECK-NEXT:    dup v0.2d, v0.d[0]
193; CHECK-NEXT:    ret
194    %c = shufflevector <2 x ptr> %a, <2 x ptr> %b, <2 x i32> <i32 0, i32 0>
195    ret <2 x ptr> %c
196}
197
198; ===== Smaller/Larger Width Vectors with Legal Element Sizes =====
199
200define <2 x i1> @shufflevector_v2i1(<2 x i1> %a, <2 x i1> %b){
201; CHECK-SD-LABEL: shufflevector_v2i1:
202; CHECK-SD:       // %bb.0:
203; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
204; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
205; CHECK-SD-NEXT:    mov v0.s[1], v1.s[1]
206; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
207; CHECK-SD-NEXT:    ret
208;
209; CHECK-GI-LABEL: shufflevector_v2i1:
210; CHECK-GI:       // %bb.0:
211; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
212; CHECK-GI-NEXT:    mov w8, v1.s[1]
213; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
214; CHECK-GI-NEXT:    mov w9, v0.s[1]
215; CHECK-GI-NEXT:    mov v1.b[1], w8
216; CHECK-GI-NEXT:    mov v0.b[1], w9
217; CHECK-GI-NEXT:    mov b1, v1.b[1]
218; CHECK-GI-NEXT:    mov v0.b[1], v1.b[0]
219; CHECK-GI-NEXT:    umov w8, v0.b[0]
220; CHECK-GI-NEXT:    umov w9, v0.b[1]
221; CHECK-GI-NEXT:    mov v0.s[0], w8
222; CHECK-GI-NEXT:    mov v0.s[1], w9
223; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
224; CHECK-GI-NEXT:    ret
225    %c = shufflevector <2 x i1> %a, <2 x i1> %b, <2 x i32> <i32 0, i32 3>
226    ret <2 x i1> %c
227}
228
229define i32 @shufflevector_v4i8(<4 x i8> %a, <4 x i8> %b){
230; CHECK-SD-LABEL: shufflevector_v4i8:
231; CHECK-SD:       // %bb.0:
232; CHECK-SD-NEXT:    sub sp, sp, #16
233; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
234; CHECK-SD-NEXT:    ext v0.8b, v1.8b, v0.8b, #6
235; CHECK-SD-NEXT:    zip1 v1.4h, v1.4h, v0.4h
236; CHECK-SD-NEXT:    ext v0.8b, v0.8b, v1.8b, #4
237; CHECK-SD-NEXT:    uzp1 v0.8b, v0.8b, v0.8b
238; CHECK-SD-NEXT:    fmov w0, s0
239; CHECK-SD-NEXT:    add sp, sp, #16
240; CHECK-SD-NEXT:    ret
241;
242; CHECK-GI-LABEL: shufflevector_v4i8:
243; CHECK-GI:       // %bb.0:
244; CHECK-GI-NEXT:    uzp1 v0.8b, v0.8b, v0.8b
245; CHECK-GI-NEXT:    adrp x8, .LCPI17_0
246; CHECK-GI-NEXT:    uzp1 v1.8b, v1.8b, v0.8b
247; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
248; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI17_0]
249; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b }, v1.16b
250; CHECK-GI-NEXT:    fmov w0, s0
251; CHECK-GI-NEXT:    ret
252    %c = shufflevector <4 x i8> %a, <4 x i8> %b, <4 x i32> <i32 1, i32 2, i32 4, i32 7>
253    %d = bitcast <4 x i8> %c to i32
254    ret i32 %d
255}
256
257define <32 x i8> @shufflevector_v32i8(<32 x i8> %a, <32 x i8> %b){
258; CHECK-SD-LABEL: shufflevector_v32i8:
259; CHECK-SD:       // %bb.0:
260; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 def $q1_q2
261; CHECK-SD-NEXT:    adrp x8, .LCPI18_0
262; CHECK-SD-NEXT:    adrp x9, .LCPI18_1
263; CHECK-SD-NEXT:    mov v1.16b, v0.16b
264; CHECK-SD-NEXT:    ldr q3, [x8, :lo12:.LCPI18_0]
265; CHECK-SD-NEXT:    ldr q4, [x9, :lo12:.LCPI18_1]
266; CHECK-SD-NEXT:    tbl v0.16b, { v1.16b, v2.16b }, v3.16b
267; CHECK-SD-NEXT:    tbl v1.16b, { v1.16b, v2.16b }, v4.16b
268; CHECK-SD-NEXT:    ret
269;
270; CHECK-GI-LABEL: shufflevector_v32i8:
271; CHECK-GI:       // %bb.0:
272; CHECK-GI-NEXT:    mov v3.16b, v0.16b
273; CHECK-GI-NEXT:    adrp x8, .LCPI18_1
274; CHECK-GI-NEXT:    adrp x9, .LCPI18_0
275; CHECK-GI-NEXT:    mov v4.16b, v2.16b
276; CHECK-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI18_1]
277; CHECK-GI-NEXT:    ldr q1, [x9, :lo12:.LCPI18_0]
278; CHECK-GI-NEXT:    tbl v0.16b, { v3.16b, v4.16b }, v0.16b
279; CHECK-GI-NEXT:    tbl v1.16b, { v3.16b, v4.16b }, v1.16b
280; CHECK-GI-NEXT:    ret
281    %c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 4, i32 32, i32 32, i32 32, i32 5, i32 32, i32 32, i32 32, i32 6, i32 32, i32 32, i32 32, i32 7, i32 32, i32 32, i32 32>
282    ret <32 x i8> %c
283}
284
285define i32 @shufflevector_v2i16(<2 x i16> %a, <2 x i16> %b){
286; CHECK-SD-LABEL: shufflevector_v2i16:
287; CHECK-SD:       // %bb.0:
288; CHECK-SD-NEXT:    sub sp, sp, #16
289; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
290; CHECK-SD-NEXT:    ext v0.8b, v0.8b, v1.8b, #4
291; CHECK-SD-NEXT:    mov w8, v0.s[1]
292; CHECK-SD-NEXT:    fmov w9, s0
293; CHECK-SD-NEXT:    strh w9, [sp, #12]
294; CHECK-SD-NEXT:    strh w8, [sp, #14]
295; CHECK-SD-NEXT:    ldr w0, [sp, #12]
296; CHECK-SD-NEXT:    add sp, sp, #16
297; CHECK-SD-NEXT:    ret
298;
299; CHECK-GI-LABEL: shufflevector_v2i16:
300; CHECK-GI:       // %bb.0:
301; CHECK-GI-NEXT:    uzp1 v0.4h, v0.4h, v0.4h
302; CHECK-GI-NEXT:    adrp x8, .LCPI19_0
303; CHECK-GI-NEXT:    uzp1 v1.4h, v1.4h, v0.4h
304; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
305; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI19_0]
306; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b }, v1.16b
307; CHECK-GI-NEXT:    fmov w0, s0
308; CHECK-GI-NEXT:    ret
309    %c = shufflevector <2 x i16> %a, <2 x i16> %b, <2 x i32> <i32 1, i32 2>
310    %d = bitcast <2 x i16> %c to i32
311    ret i32 %d
312}
313
314define <16 x i16> @shufflevector_v16i16(<16 x i16> %a, <16 x i16> %b){
315; CHECK-SD-LABEL: shufflevector_v16i16:
316; CHECK-SD:       // %bb.0:
317; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 def $q1_q2
318; CHECK-SD-NEXT:    adrp x8, .LCPI20_0
319; CHECK-SD-NEXT:    adrp x9, .LCPI20_1
320; CHECK-SD-NEXT:    mov v1.16b, v0.16b
321; CHECK-SD-NEXT:    ldr q3, [x8, :lo12:.LCPI20_0]
322; CHECK-SD-NEXT:    ldr q4, [x9, :lo12:.LCPI20_1]
323; CHECK-SD-NEXT:    tbl v0.16b, { v1.16b, v2.16b }, v3.16b
324; CHECK-SD-NEXT:    tbl v1.16b, { v1.16b, v2.16b }, v4.16b
325; CHECK-SD-NEXT:    ret
326;
327; CHECK-GI-LABEL: shufflevector_v16i16:
328; CHECK-GI:       // %bb.0:
329; CHECK-GI-NEXT:    mov v3.16b, v0.16b
330; CHECK-GI-NEXT:    adrp x8, .LCPI20_1
331; CHECK-GI-NEXT:    adrp x9, .LCPI20_0
332; CHECK-GI-NEXT:    mov v4.16b, v2.16b
333; CHECK-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI20_1]
334; CHECK-GI-NEXT:    ldr q1, [x9, :lo12:.LCPI20_0]
335; CHECK-GI-NEXT:    tbl v0.16b, { v3.16b, v4.16b }, v0.16b
336; CHECK-GI-NEXT:    tbl v1.16b, { v3.16b, v4.16b }, v1.16b
337; CHECK-GI-NEXT:    ret
338    %c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16>
339    ret <16 x i16> %c
340}
341
342define <1 x i32> @shufflevector_v1i32(<1 x i32> %a, <1 x i32> %b) {
343; CHECK-LABEL: shufflevector_v1i32:
344; CHECK:       // %bb.0:
345; CHECK-NEXT:    fmov d0, d1
346; CHECK-NEXT:    ret
347    %c = shufflevector <1 x i32> %a, <1 x i32> %b, <1 x i32> <i32 1>
348    ret <1 x i32> %c
349}
350
351define <8 x i32> @shufflevector_v8i32(<8 x i32> %a, <8 x i32> %b) {
352; CHECK-SD-LABEL: shufflevector_v8i32:
353; CHECK-SD:       // %bb.0:
354; CHECK-SD-NEXT:    uzp1 v2.4s, v2.4s, v3.4s
355; CHECK-SD-NEXT:    uzp2 v0.4s, v0.4s, v1.4s
356; CHECK-SD-NEXT:    mov v2.s[3], v3.s[3]
357; CHECK-SD-NEXT:    mov v1.16b, v2.16b
358; CHECK-SD-NEXT:    ret
359;
360; CHECK-GI-LABEL: shufflevector_v8i32:
361; CHECK-GI:       // %bb.0:
362; CHECK-GI-NEXT:    adrp x8, .LCPI22_0
363; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
364; CHECK-GI-NEXT:    uzp2 v0.4s, v0.4s, v1.4s
365; CHECK-GI-NEXT:    ldr q4, [x8, :lo12:.LCPI22_0]
366; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
367; CHECK-GI-NEXT:    tbl v1.16b, { v2.16b, v3.16b }, v4.16b
368; CHECK-GI-NEXT:    ret
369    %c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12, i32 15>
370    ret <8 x i32> %c
371}
372
373define <4 x i64> @shufflevector_v4i64(<4 x i64> %a, <4 x i64> %b) {
374; CHECK-SD-LABEL: shufflevector_v4i64:
375; CHECK-SD:       // %bb.0:
376; CHECK-SD-NEXT:    zip2 v2.2d, v2.2d, v3.2d
377; CHECK-SD-NEXT:    zip2 v0.2d, v0.2d, v1.2d
378; CHECK-SD-NEXT:    mov v1.16b, v2.16b
379; CHECK-SD-NEXT:    ret
380;
381; CHECK-GI-LABEL: shufflevector_v4i64:
382; CHECK-GI:       // %bb.0:
383; CHECK-GI-NEXT:    zip2 v0.2d, v0.2d, v1.2d
384; CHECK-GI-NEXT:    zip2 v1.2d, v2.2d, v3.2d
385; CHECK-GI-NEXT:    ret
386    %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
387    ret <4 x i64> %c
388}
389
390define <3 x ptr> @shufflevector_v3p0(<3 x ptr> %a, <3 x ptr> %b) {
391; CHECK-SD-LABEL: shufflevector_v3p0:
392; CHECK-SD:       // %bb.0:
393; CHECK-SD-NEXT:    fmov d2, d5
394; CHECK-SD-NEXT:    fmov d0, d1
395; CHECK-SD-NEXT:    fmov d1, d3
396; CHECK-SD-NEXT:    ret
397;
398; CHECK-GI-LABEL: shufflevector_v3p0:
399; CHECK-GI:       // %bb.0:
400; CHECK-GI-NEXT:    fmov x8, d0
401; CHECK-GI-NEXT:    fmov x9, d3
402; CHECK-GI-NEXT:    mov v0.d[0], x8
403; CHECK-GI-NEXT:    mov v2.d[0], x9
404; CHECK-GI-NEXT:    fmov x8, d1
405; CHECK-GI-NEXT:    fmov x9, d4
406; CHECK-GI-NEXT:    mov v0.d[1], x8
407; CHECK-GI-NEXT:    mov v2.d[1], x9
408; CHECK-GI-NEXT:    fmov x8, d5
409; CHECK-GI-NEXT:    mov v1.d[0], x8
410; CHECK-GI-NEXT:    ext v0.16b, v0.16b, v2.16b, #8
411; CHECK-GI-NEXT:    fmov x10, d1
412; CHECK-GI-NEXT:    mov d2, v0.d[1]
413; CHECK-GI-NEXT:    fmov d1, d2
414; CHECK-GI-NEXT:    fmov d2, x10
415; CHECK-GI-NEXT:    ret
416    %c = shufflevector <3 x ptr> %a, <3 x ptr> %b, <3 x i32> <i32 1, i32 3, i32 5>
417    ret <3 x ptr> %c
418}
419
420define <4 x ptr> @shufflevector_v4p0(<4 x ptr> %a, <4 x ptr> %b) {
421; CHECK-SD-LABEL: shufflevector_v4p0:
422; CHECK-SD:       // %bb.0:
423; CHECK-SD-NEXT:    zip2 v2.2d, v2.2d, v3.2d
424; CHECK-SD-NEXT:    zip2 v0.2d, v0.2d, v1.2d
425; CHECK-SD-NEXT:    mov v1.16b, v2.16b
426; CHECK-SD-NEXT:    ret
427;
428; CHECK-GI-LABEL: shufflevector_v4p0:
429; CHECK-GI:       // %bb.0:
430; CHECK-GI-NEXT:    zip2 v0.2d, v0.2d, v1.2d
431; CHECK-GI-NEXT:    zip2 v1.2d, v2.2d, v3.2d
432; CHECK-GI-NEXT:    ret
433    %c = shufflevector <4 x ptr> %a, <4 x ptr> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
434    ret <4 x ptr> %c
435}
436
437; ===== Smaller/Larger Width Vectors with Zero Masks =====
438
439define <2 x i1> @shufflevector_v2i1_zeroes(<2 x i1> %a, <2 x i1> %b){
440; CHECK-SD-LABEL: shufflevector_v2i1_zeroes:
441; CHECK-SD:       // %bb.0:
442; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
443; CHECK-SD-NEXT:    dup v0.2s, v0.s[0]
444; CHECK-SD-NEXT:    ret
445;
446; CHECK-GI-LABEL: shufflevector_v2i1_zeroes:
447; CHECK-GI:       // %bb.0:
448; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
449; CHECK-GI-NEXT:    mov w8, v0.s[1]
450; CHECK-GI-NEXT:    mov v0.b[1], w8
451; CHECK-GI-NEXT:    dup v0.8b, v0.b[0]
452; CHECK-GI-NEXT:    umov w8, v0.b[0]
453; CHECK-GI-NEXT:    umov w9, v0.b[1]
454; CHECK-GI-NEXT:    mov v0.s[0], w8
455; CHECK-GI-NEXT:    mov v0.s[1], w9
456; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
457; CHECK-GI-NEXT:    ret
458    %c = shufflevector <2 x i1> %a, <2 x i1> %b, <2 x i32> <i32 0, i32 0>
459    ret <2 x i1> %c
460}
461
462define i32 @shufflevector_v4i8_zeroes(<4 x i8> %a, <4 x i8> %b){
463; CHECK-SD-LABEL: shufflevector_v4i8_zeroes:
464; CHECK-SD:       // %bb.0:
465; CHECK-SD-NEXT:    sub sp, sp, #16
466; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
467; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
468; CHECK-SD-NEXT:    dup v0.4h, v0.h[0]
469; CHECK-SD-NEXT:    uzp1 v0.8b, v0.8b, v0.8b
470; CHECK-SD-NEXT:    fmov w0, s0
471; CHECK-SD-NEXT:    add sp, sp, #16
472; CHECK-SD-NEXT:    ret
473;
474; CHECK-GI-LABEL: shufflevector_v4i8_zeroes:
475; CHECK-GI:       // %bb.0:
476; CHECK-GI-NEXT:    uzp1 v0.8b, v0.8b, v0.8b
477; CHECK-GI-NEXT:    dup v0.8b, v0.b[0]
478; CHECK-GI-NEXT:    fmov w0, s0
479; CHECK-GI-NEXT:    ret
480    %c = shufflevector <4 x i8> %a, <4 x i8> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
481    %d = bitcast <4 x i8> %c to i32
482    ret i32 %d
483}
484
485define <32 x i8> @shufflevector_v32i8_zeroes(<32 x i8> %a, <32 x i8> %b){
486; CHECK-LABEL: shufflevector_v32i8_zeroes:
487; CHECK:       // %bb.0:
488; CHECK-NEXT:    dup v0.16b, v0.b[0]
489; CHECK-NEXT:    mov v1.16b, v0.16b
490; CHECK-NEXT:    ret
491    %c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
492    ret <32 x i8> %c
493}
494
495define i32 @shufflevector_v2i16_zeroes(<2 x i16> %a, <2 x i16> %b){
496; CHECK-SD-LABEL: shufflevector_v2i16_zeroes:
497; CHECK-SD:       // %bb.0:
498; CHECK-SD-NEXT:    sub sp, sp, #16
499; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
500; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
501; CHECK-SD-NEXT:    dup v1.2s, v0.s[0]
502; CHECK-SD-NEXT:    fmov w9, s0
503; CHECK-SD-NEXT:    strh w9, [sp, #12]
504; CHECK-SD-NEXT:    mov w8, v1.s[1]
505; CHECK-SD-NEXT:    strh w8, [sp, #14]
506; CHECK-SD-NEXT:    ldr w0, [sp, #12]
507; CHECK-SD-NEXT:    add sp, sp, #16
508; CHECK-SD-NEXT:    ret
509;
510; CHECK-GI-LABEL: shufflevector_v2i16_zeroes:
511; CHECK-GI:       // %bb.0:
512; CHECK-GI-NEXT:    uzp1 v0.4h, v0.4h, v0.4h
513; CHECK-GI-NEXT:    dup v0.4h, v0.h[0]
514; CHECK-GI-NEXT:    fmov w0, s0
515; CHECK-GI-NEXT:    ret
516    %c = shufflevector <2 x i16> %a, <2 x i16> %b, <2 x i32> <i32 0, i32 0>
517    %d = bitcast <2 x i16> %c to i32
518    ret i32 %d
519}
520
521define <16 x i16> @shufflevector_v16i16_zeroes(<16 x i16> %a, <16 x i16> %b){
522; CHECK-LABEL: shufflevector_v16i16_zeroes:
523; CHECK:       // %bb.0:
524; CHECK-NEXT:    dup v0.8h, v0.h[0]
525; CHECK-NEXT:    mov v1.16b, v0.16b
526; CHECK-NEXT:    ret
527    %c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
528    ret <16 x i16> %c
529}
530
531define <1 x i32> @shufflevector_v1i32_zeroes(<1 x i32> %a, <1 x i32> %b) {
532; CHECK-LABEL: shufflevector_v1i32_zeroes:
533; CHECK:       // %bb.0:
534; CHECK-NEXT:    ret
535    %c = shufflevector <1 x i32> %a, <1 x i32> %b, <1 x i32> <i32 0>
536    ret <1 x i32> %c
537}
538
539define <8 x i32> @shufflevector_v8i32_zeroes(<8 x i32> %a, <8 x i32> %b) {
540; CHECK-LABEL: shufflevector_v8i32_zeroes:
541; CHECK:       // %bb.0:
542; CHECK-NEXT:    dup v0.4s, v0.s[0]
543; CHECK-NEXT:    mov v1.16b, v0.16b
544; CHECK-NEXT:    ret
545    %c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
546    ret <8 x i32> %c
547}
548
549define <4 x i64> @shufflevector_v4i64_zeroes(<4 x i64> %a, <4 x i64> %b) {
550; CHECK-LABEL: shufflevector_v4i64_zeroes:
551; CHECK:       // %bb.0:
552; CHECK-NEXT:    dup v0.2d, v0.d[0]
553; CHECK-NEXT:    mov v1.16b, v0.16b
554; CHECK-NEXT:    ret
555    %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
556    ret <4 x i64> %c
557}
558
559define <4 x ptr> @shufflevector_v4p0_zeroes(<4 x ptr> %a, <4 x ptr> %b) {
560; CHECK-LABEL: shufflevector_v4p0_zeroes:
561; CHECK:       // %bb.0:
562; CHECK-NEXT:    dup v0.2d, v0.d[0]
563; CHECK-NEXT:    mov v1.16b, v0.16b
564; CHECK-NEXT:    ret
565    %c = shufflevector <4 x ptr> %a, <4 x ptr> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
566    ret <4 x ptr> %c
567}
568
569; ===== Vectors with Non-Pow 2 Widths =====
570
571define <3 x i8> @shufflevector_v3i8(<3 x i8> %a, <3 x i8> %b) {
572; CHECK-SD-LABEL: shufflevector_v3i8:
573; CHECK-SD:       // %bb.0:
574; CHECK-SD-NEXT:    mov w0, w1
575; CHECK-SD-NEXT:    mov w1, w2
576; CHECK-SD-NEXT:    mov w2, w4
577; CHECK-SD-NEXT:    ret
578;
579; CHECK-GI-LABEL: shufflevector_v3i8:
580; CHECK-GI:       // %bb.0:
581; CHECK-GI-NEXT:    fmov s0, w0
582; CHECK-GI-NEXT:    fmov s1, w3
583; CHECK-GI-NEXT:    adrp x8, .LCPI35_0
584; CHECK-GI-NEXT:    mov v0.b[1], w1
585; CHECK-GI-NEXT:    mov v1.b[1], w4
586; CHECK-GI-NEXT:    mov v0.b[2], w2
587; CHECK-GI-NEXT:    mov v1.b[2], w5
588; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
589; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI35_0]
590; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b }, v1.16b
591; CHECK-GI-NEXT:    umov w0, v0.b[0]
592; CHECK-GI-NEXT:    umov w1, v0.b[1]
593; CHECK-GI-NEXT:    umov w2, v0.b[2]
594; CHECK-GI-NEXT:    ret
595    %c = shufflevector <3 x i8> %a, <3 x i8> %b, <3 x i32> <i32 1, i32 2, i32 4>
596    ret <3 x i8> %c
597}
598
599define <7 x i8> @shufflevector_v7i8(<7 x i8> %a, <7 x i8> %b) {
600; CHECK-SD-LABEL: shufflevector_v7i8:
601; CHECK-SD:       // %bb.0:
602; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
603; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
604; CHECK-SD-NEXT:    adrp x8, .LCPI36_0
605; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
606; CHECK-SD-NEXT:    ldr d1, [x8, :lo12:.LCPI36_0]
607; CHECK-SD-NEXT:    tbl v0.8b, { v0.16b }, v1.8b
608; CHECK-SD-NEXT:    ret
609;
610; CHECK-GI-LABEL: shufflevector_v7i8:
611; CHECK-GI:       // %bb.0:
612; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
613; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
614; CHECK-GI-NEXT:    adrp x8, .LCPI36_0
615; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
616; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI36_0]
617; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b }, v1.16b
618; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
619; CHECK-GI-NEXT:    ret
620    %c = shufflevector <7 x i8> %a, <7 x i8> %b, <7 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12>
621    ret <7 x i8> %c
622}
623
624define <3 x i16> @shufflevector_v3i16(<3 x i16> %a, <3 x i16> %b) {
625; CHECK-SD-LABEL: shufflevector_v3i16:
626; CHECK-SD:       // %bb.0:
627; CHECK-SD-NEXT:    zip1 v1.4h, v0.4h, v1.4h
628; CHECK-SD-NEXT:    zip2 v0.4h, v1.4h, v0.4h
629; CHECK-SD-NEXT:    ret
630;
631; CHECK-GI-LABEL: shufflevector_v3i16:
632; CHECK-GI:       // %bb.0:
633; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
634; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
635; CHECK-GI-NEXT:    adrp x8, .LCPI37_0
636; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
637; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI37_0]
638; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b }, v1.16b
639; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
640; CHECK-GI-NEXT:    ret
641    %c = shufflevector <3 x i16> %a, <3 x i16> %b, <3 x i32> <i32 1, i32 2, i32 4>
642    ret <3 x i16> %c
643}
644
645define <7 x i16> @shufflevector_v7i16(<7 x i16> %a, <7 x i16> %b) {
646; CHECK-SD-LABEL: shufflevector_v7i16:
647; CHECK-SD:       // %bb.0:
648; CHECK-SD-NEXT:    adrp x8, .LCPI38_0
649; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
650; CHECK-SD-NEXT:    ldr q2, [x8, :lo12:.LCPI38_0]
651; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
652; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
653; CHECK-SD-NEXT:    ret
654;
655; CHECK-GI-LABEL: shufflevector_v7i16:
656; CHECK-GI:       // %bb.0:
657; CHECK-GI-NEXT:    adrp x8, .LCPI38_0
658; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
659; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI38_0]
660; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
661; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
662; CHECK-GI-NEXT:    ret
663    %c = shufflevector <7 x i16> %a, <7 x i16> %b, <7 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12>
664    ret <7 x i16> %c
665}
666
667define <3 x i32> @shufflevector_v3i32(<3 x i32> %a, <3 x i32> %b) {
668; CHECK-SD-LABEL: shufflevector_v3i32:
669; CHECK-SD:       // %bb.0:
670; CHECK-SD-NEXT:    zip1 v1.4s, v0.4s, v1.4s
671; CHECK-SD-NEXT:    zip2 v0.4s, v1.4s, v0.4s
672; CHECK-SD-NEXT:    ret
673;
674; CHECK-GI-LABEL: shufflevector_v3i32:
675; CHECK-GI:       // %bb.0:
676; CHECK-GI-NEXT:    adrp x8, .LCPI39_0
677; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
678; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI39_0]
679; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
680; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
681; CHECK-GI-NEXT:    ret
682    %c = shufflevector <3 x i32> %a, <3 x i32> %b, <3 x i32> <i32 1, i32 2, i32 4>
683    ret <3 x i32> %c
684}
685
686; ===== Vectors with Non-Pow 2 Widths with Zero Masks =====
687
688define <3 x i8> @shufflevector_v3i8_zeroes(<3 x i8> %a, <3 x i8> %b) {
689; CHECK-SD-LABEL: shufflevector_v3i8_zeroes:
690; CHECK-SD:       // %bb.0:
691; CHECK-SD-NEXT:    mov w1, w0
692; CHECK-SD-NEXT:    mov w2, w0
693; CHECK-SD-NEXT:    ret
694;
695; CHECK-GI-LABEL: shufflevector_v3i8_zeroes:
696; CHECK-GI:       // %bb.0:
697; CHECK-GI-NEXT:    fmov s0, w0
698; CHECK-GI-NEXT:    mov v0.b[1], w1
699; CHECK-GI-NEXT:    mov v0.b[2], w2
700; CHECK-GI-NEXT:    dup v0.8b, v0.b[0]
701; CHECK-GI-NEXT:    umov w0, v0.b[0]
702; CHECK-GI-NEXT:    umov w1, v0.b[1]
703; CHECK-GI-NEXT:    umov w2, v0.b[2]
704; CHECK-GI-NEXT:    ret
705    %c = shufflevector <3 x i8> %a, <3 x i8> %b, <3 x i32> <i32 0, i32 0, i32 0>
706    ret <3 x i8> %c
707}
708
709define <7 x i8> @shufflevector_v7i8_zeroes(<7 x i8> %a, <7 x i8> %b) {
710; CHECK-LABEL: shufflevector_v7i8_zeroes:
711; CHECK:       // %bb.0:
712; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
713; CHECK-NEXT:    dup v0.8b, v0.b[0]
714; CHECK-NEXT:    ret
715    %c = shufflevector <7 x i8> %a, <7 x i8> %b, <7 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
716    ret <7 x i8> %c
717}
718
719define <3 x i16> @shufflevector_v3i16_zeroes(<3 x i16> %a, <3 x i16> %b) {
720; CHECK-LABEL: shufflevector_v3i16_zeroes:
721; CHECK:       // %bb.0:
722; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
723; CHECK-NEXT:    dup v0.4h, v0.h[0]
724; CHECK-NEXT:    ret
725    %c = shufflevector <3 x i16> %a, <3 x i16> %b, <3 x i32> <i32 0, i32 0, i32 0>
726    ret <3 x i16> %c
727}
728
729define <7 x i16> @shufflevector_v7i16_zeroes(<7 x i16> %a, <7 x i16> %b) {
730; CHECK-LABEL: shufflevector_v7i16_zeroes:
731; CHECK:       // %bb.0:
732; CHECK-NEXT:    dup v0.8h, v0.h[0]
733; CHECK-NEXT:    ret
734    %c = shufflevector <7 x i16> %a, <7 x i16> %b, <7 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
735    ret <7 x i16> %c
736}
737
738define <3 x i32> @shufflevector_v3i32_zeroes(<3 x i32> %a, <3 x i32> %b) {
739; CHECK-LABEL: shufflevector_v3i32_zeroes:
740; CHECK:       // %bb.0:
741; CHECK-NEXT:    dup v0.4s, v0.s[0]
742; CHECK-NEXT:    ret
743    %c = shufflevector <3 x i32> %a, <3 x i32> %b, <3 x i32> <i32 0, i32 0, i32 0>
744    ret <3 x i32> %c
745}
746