xref: /llvm-project/llvm/test/CodeGen/AArch64/shuffles.ll (revision f6947e479e14e7904aa0b2539a95f5dfdc8f9295)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=aarch64--linux-gnu | FileCheck %s --check-prefix=CHECKLE
3; RUN: llc < %s -mtriple=aarch64_be--linux-gnu | FileCheck %s --check-prefix=CHECKBE
4
5define <16 x i32> @test_shuf1(<16 x i32> %x, <16 x i32> %y) {
6; CHECKLE-LABEL: test_shuf1:
7; CHECKLE:       // %bb.0:
8; CHECKLE-NEXT:    ext v3.16b, v6.16b, v1.16b, #4
9; CHECKLE-NEXT:    uzp1 v5.4s, v1.4s, v0.4s
10; CHECKLE-NEXT:    uzp2 v16.4s, v2.4s, v4.4s
11; CHECKLE-NEXT:    dup v17.4s, v4.s[0]
12; CHECKLE-NEXT:    trn2 v4.4s, v1.4s, v3.4s
13; CHECKLE-NEXT:    mov v17.s[0], v6.s[3]
14; CHECKLE-NEXT:    trn2 v1.4s, v5.4s, v1.4s
15; CHECKLE-NEXT:    rev64 v3.4s, v7.4s
16; CHECKLE-NEXT:    trn1 v2.4s, v16.4s, v2.4s
17; CHECKLE-NEXT:    mov v4.s[0], v7.s[1]
18; CHECKLE-NEXT:    ext v1.16b, v0.16b, v1.16b, #12
19; CHECKLE-NEXT:    mov v3.d[0], v17.d[0]
20; CHECKLE-NEXT:    mov v2.s[3], v7.s[0]
21; CHECKLE-NEXT:    mov v0.16b, v4.16b
22; CHECKLE-NEXT:    ret
23;
24; CHECKBE-LABEL: test_shuf1:
25; CHECKBE:       // %bb.0:
26; CHECKBE-NEXT:    rev64 v1.4s, v1.4s
27; CHECKBE-NEXT:    rev64 v3.4s, v6.4s
28; CHECKBE-NEXT:    rev64 v0.4s, v0.4s
29; CHECKBE-NEXT:    rev64 v2.4s, v2.4s
30; CHECKBE-NEXT:    rev64 v4.4s, v4.4s
31; CHECKBE-NEXT:    rev64 v5.4s, v7.4s
32; CHECKBE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
33; CHECKBE-NEXT:    ext v3.16b, v3.16b, v3.16b, #8
34; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
35; CHECKBE-NEXT:    ext v2.16b, v2.16b, v2.16b, #8
36; CHECKBE-NEXT:    ext v4.16b, v4.16b, v4.16b, #8
37; CHECKBE-NEXT:    ext v5.16b, v5.16b, v5.16b, #8
38; CHECKBE-NEXT:    ext v6.16b, v3.16b, v1.16b, #4
39; CHECKBE-NEXT:    uzp1 v16.4s, v1.4s, v0.4s
40; CHECKBE-NEXT:    uzp2 v7.4s, v2.4s, v4.4s
41; CHECKBE-NEXT:    dup v4.4s, v4.s[0]
42; CHECKBE-NEXT:    rev64 v17.4s, v5.4s
43; CHECKBE-NEXT:    trn2 v6.4s, v1.4s, v6.4s
44; CHECKBE-NEXT:    mov v4.s[0], v3.s[3]
45; CHECKBE-NEXT:    trn2 v1.4s, v16.4s, v1.4s
46; CHECKBE-NEXT:    trn1 v2.4s, v7.4s, v2.4s
47; CHECKBE-NEXT:    rev64 v3.4s, v17.4s
48; CHECKBE-NEXT:    mov v6.s[0], v5.s[1]
49; CHECKBE-NEXT:    rev64 v4.4s, v4.4s
50; CHECKBE-NEXT:    ext v0.16b, v0.16b, v1.16b, #12
51; CHECKBE-NEXT:    mov v2.s[3], v5.s[0]
52; CHECKBE-NEXT:    rev64 v1.4s, v6.4s
53; CHECKBE-NEXT:    mov v3.d[0], v4.d[0]
54; CHECKBE-NEXT:    rev64 v4.4s, v0.4s
55; CHECKBE-NEXT:    rev64 v2.4s, v2.4s
56; CHECKBE-NEXT:    ext v0.16b, v1.16b, v1.16b, #8
57; CHECKBE-NEXT:    ext v3.16b, v3.16b, v3.16b, #8
58; CHECKBE-NEXT:    ext v1.16b, v4.16b, v4.16b, #8
59; CHECKBE-NEXT:    ext v2.16b, v2.16b, v2.16b, #8
60; CHECKBE-NEXT:    ret
61  %s3 = shufflevector <16 x i32> %x, <16 x i32> %y, <16 x i32> <i32 29, i32 26, i32 7, i32 4, i32 3, i32 6, i32 5, i32 2, i32 9, i32 8, i32 17, i32 28, i32 27, i32 16, i32 31, i32 30>
62  ret <16 x i32> %s3
63}
64
65define <4 x i32> @test_shuf2(<16 x i32> %x, <16 x i32> %y) {
66; CHECKLE-LABEL: test_shuf2:
67; CHECKLE:       // %bb.0:
68; CHECKLE-NEXT:    zip2 v0.4s, v7.4s, v6.4s
69; CHECKLE-NEXT:    trn2 v2.4s, v7.4s, v0.4s
70; CHECKLE-NEXT:    ext v0.16b, v1.16b, v1.16b, #4
71; CHECKLE-NEXT:    mov v0.d[0], v2.d[0]
72; CHECKLE-NEXT:    ret
73;
74; CHECKBE-LABEL: test_shuf2:
75; CHECKBE:       // %bb.0:
76; CHECKBE-NEXT:    rev64 v0.4s, v6.4s
77; CHECKBE-NEXT:    rev64 v2.4s, v7.4s
78; CHECKBE-NEXT:    rev64 v1.4s, v1.4s
79; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
80; CHECKBE-NEXT:    ext v2.16b, v2.16b, v2.16b, #8
81; CHECKBE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
82; CHECKBE-NEXT:    zip2 v0.4s, v2.4s, v0.4s
83; CHECKBE-NEXT:    ext v1.16b, v1.16b, v1.16b, #4
84; CHECKBE-NEXT:    trn2 v0.4s, v2.4s, v0.4s
85; CHECKBE-NEXT:    rev64 v1.4s, v1.4s
86; CHECKBE-NEXT:    rev64 v0.4s, v0.4s
87; CHECKBE-NEXT:    mov v1.d[0], v0.d[0]
88; CHECKBE-NEXT:    ext v0.16b, v1.16b, v1.16b, #8
89; CHECKBE-NEXT:    ret
90  %s3 = shufflevector <16 x i32> %x, <16 x i32> %y, <4 x i32> <i32 29, i32 26, i32 7, i32 4>
91  ret <4 x i32> %s3
92}
93
94define <4 x i32> @test_shuf3(<16 x i32> %x, <16 x i32> %y) {
95; CHECKLE-LABEL: test_shuf3:
96; CHECKLE:       // %bb.0:
97; CHECKLE-NEXT:    uzp1 v2.4s, v1.4s, v0.4s
98; CHECKLE-NEXT:    trn2 v1.4s, v2.4s, v1.4s
99; CHECKLE-NEXT:    ext v0.16b, v0.16b, v1.16b, #12
100; CHECKLE-NEXT:    ret
101;
102; CHECKBE-LABEL: test_shuf3:
103; CHECKBE:       // %bb.0:
104; CHECKBE-NEXT:    rev64 v0.4s, v0.4s
105; CHECKBE-NEXT:    rev64 v1.4s, v1.4s
106; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
107; CHECKBE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
108; CHECKBE-NEXT:    uzp1 v2.4s, v1.4s, v0.4s
109; CHECKBE-NEXT:    trn2 v1.4s, v2.4s, v1.4s
110; CHECKBE-NEXT:    ext v0.16b, v0.16b, v1.16b, #12
111; CHECKBE-NEXT:    rev64 v0.4s, v0.4s
112; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
113; CHECKBE-NEXT:    ret
114  %s3 = shufflevector <16 x i32> %x, <16 x i32> %y, <4 x i32> <i32 3, i32 6, i32 5, i32 2>
115  ret <4 x i32> %s3
116}
117
118define <4 x i32> @test_shuf4(<16 x i32> %x, <16 x i32> %y) {
119; CHECKLE-LABEL: test_shuf4:
120; CHECKLE:       // %bb.0:
121; CHECKLE-NEXT:    uzp2 v0.4s, v2.4s, v4.4s
122; CHECKLE-NEXT:    trn1 v0.4s, v0.4s, v2.4s
123; CHECKLE-NEXT:    mov v0.s[3], v7.s[0]
124; CHECKLE-NEXT:    ret
125;
126; CHECKBE-LABEL: test_shuf4:
127; CHECKBE:       // %bb.0:
128; CHECKBE-NEXT:    rev64 v0.4s, v4.4s
129; CHECKBE-NEXT:    rev64 v1.4s, v2.4s
130; CHECKBE-NEXT:    rev64 v2.4s, v7.4s
131; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
132; CHECKBE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
133; CHECKBE-NEXT:    ext v2.16b, v2.16b, v2.16b, #8
134; CHECKBE-NEXT:    uzp2 v0.4s, v1.4s, v0.4s
135; CHECKBE-NEXT:    trn1 v0.4s, v0.4s, v1.4s
136; CHECKBE-NEXT:    mov v0.s[3], v2.s[0]
137; CHECKBE-NEXT:    rev64 v0.4s, v0.4s
138; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
139; CHECKBE-NEXT:    ret
140  %s3 = shufflevector <16 x i32> %x, <16 x i32> %y, <4 x i32> <i32 9, i32 8, i32 17, i32 28>
141  ret <4 x i32> %s3
142}
143
144define <4 x i32> @test_shuf5(<16 x i32> %x, <16 x i32> %y) {
145; CHECKLE-LABEL: test_shuf5:
146; CHECKLE:       // %bb.0:
147; CHECKLE-NEXT:    ext v1.16b, v6.16b, v4.16b, #12
148; CHECKLE-NEXT:    rev64 v0.4s, v7.4s
149; CHECKLE-NEXT:    mov v0.d[0], v1.d[0]
150; CHECKLE-NEXT:    ret
151;
152; CHECKBE-LABEL: test_shuf5:
153; CHECKBE:       // %bb.0:
154; CHECKBE-NEXT:    rev64 v0.4s, v7.4s
155; CHECKBE-NEXT:    rev64 v1.4s, v4.4s
156; CHECKBE-NEXT:    rev64 v2.4s, v6.4s
157; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
158; CHECKBE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
159; CHECKBE-NEXT:    ext v2.16b, v2.16b, v2.16b, #8
160; CHECKBE-NEXT:    ext v1.16b, v2.16b, v1.16b, #12
161; CHECKBE-NEXT:    rev64 v0.4s, v0.4s
162; CHECKBE-NEXT:    rev64 v1.4s, v1.4s
163; CHECKBE-NEXT:    rev64 v0.4s, v0.4s
164; CHECKBE-NEXT:    mov v0.d[0], v1.d[0]
165; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
166; CHECKBE-NEXT:    ret
167  %s3 = shufflevector <16 x i32> %x, <16 x i32> %y, <4 x i32> <i32 27, i32 16, i32 31, i32 30>
168  ret <4 x i32> %s3
169}
170
171define <4 x i32> @test1503(<4 x i32> %a, <4 x i32> %b)
172; CHECKLE-LABEL: test1503:
173; CHECKLE:       // %bb.0:
174; CHECKLE-NEXT:    zip1 v1.4s, v0.4s, v1.4s
175; CHECKLE-NEXT:    ext v1.16b, v1.16b, v0.16b, #8
176; CHECKLE-NEXT:    mov v1.s[3], v0.s[3]
177; CHECKLE-NEXT:    mov v0.16b, v1.16b
178; CHECKLE-NEXT:    ret
179;
180; CHECKBE-LABEL: test1503:
181; CHECKBE:       // %bb.0:
182; CHECKBE-NEXT:    rev64 v1.4s, v1.4s
183; CHECKBE-NEXT:    rev64 v0.4s, v0.4s
184; CHECKBE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
185; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
186; CHECKBE-NEXT:    zip1 v1.4s, v0.4s, v1.4s
187; CHECKBE-NEXT:    ext v1.16b, v1.16b, v0.16b, #8
188; CHECKBE-NEXT:    mov v1.s[3], v0.s[3]
189; CHECKBE-NEXT:    rev64 v0.4s, v1.4s
190; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
191; CHECKBE-NEXT:    ret
192{
193  %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 0, i32 3>
194  ret <4 x i32> %r
195}
196
197define <4 x i32> @test4366(<4 x i32> %a, <4 x i32> %b)
198; CHECKLE-LABEL: test4366:
199; CHECKLE:       // %bb.0:
200; CHECKLE-NEXT:    trn1 v1.4s, v1.4s, v1.4s
201; CHECKLE-NEXT:    mov v1.s[1], v0.s[3]
202; CHECKLE-NEXT:    mov v0.16b, v1.16b
203; CHECKLE-NEXT:    ret
204;
205; CHECKBE-LABEL: test4366:
206; CHECKBE:       // %bb.0:
207; CHECKBE-NEXT:    rev64 v1.4s, v1.4s
208; CHECKBE-NEXT:    rev64 v0.4s, v0.4s
209; CHECKBE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
210; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
211; CHECKBE-NEXT:    trn1 v1.4s, v1.4s, v1.4s
212; CHECKBE-NEXT:    mov v1.s[1], v0.s[3]
213; CHECKBE-NEXT:    rev64 v0.4s, v1.4s
214; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
215; CHECKBE-NEXT:    ret
216{
217  %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 3, i32 6, i32 6>
218  ret <4 x i32> %r
219}
220
221define <4 x i32> @test7367(<4 x i32> %a, <4 x i32> %b)
222; CHECKLE-LABEL: test7367:
223; CHECKLE:       // %bb.0:
224; CHECKLE-NEXT:    mov v2.16b, v1.16b
225; CHECKLE-NEXT:    mov v2.d[0], v0.d[1]
226; CHECKLE-NEXT:    mov v2.s[0], v1.s[3]
227; CHECKLE-NEXT:    mov v0.16b, v2.16b
228; CHECKLE-NEXT:    ret
229;
230; CHECKBE-LABEL: test7367:
231; CHECKBE:       // %bb.0:
232; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
233; CHECKBE-NEXT:    ext v2.16b, v1.16b, v1.16b, #8
234; CHECKBE-NEXT:    rev64 v1.4s, v1.4s
235; CHECKBE-NEXT:    mov v2.d[0], v0.d[1]
236; CHECKBE-NEXT:    ext v0.16b, v1.16b, v1.16b, #8
237; CHECKBE-NEXT:    rev64 v1.4s, v2.4s
238; CHECKBE-NEXT:    mov v1.s[0], v0.s[3]
239; CHECKBE-NEXT:    rev64 v0.4s, v1.4s
240; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
241; CHECKBE-NEXT:    ret
242{
243  %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 3, i32 6, i32 7>
244  ret <4 x i32> %r
245}
246
247define <4 x i32> @test4045(<4 x i32> %a, <4 x i32> %b)
248; CHECKLE-LABEL: test4045:
249; CHECKLE:       // %bb.0:
250; CHECKLE-NEXT:    trn1 v0.4s, v1.4s, v0.4s
251; CHECKLE-NEXT:    mov v0.d[1], v1.d[0]
252; CHECKLE-NEXT:    ret
253;
254; CHECKBE-LABEL: test4045:
255; CHECKBE:       // %bb.0:
256; CHECKBE-NEXT:    rev64 v0.4s, v0.4s
257; CHECKBE-NEXT:    rev64 v2.4s, v1.4s
258; CHECKBE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
259; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
260; CHECKBE-NEXT:    ext v2.16b, v2.16b, v2.16b, #8
261; CHECKBE-NEXT:    trn1 v0.4s, v2.4s, v0.4s
262; CHECKBE-NEXT:    rev64 v0.4s, v0.4s
263; CHECKBE-NEXT:    mov v0.d[1], v1.d[0]
264; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
265; CHECKBE-NEXT:    ret
266{
267  %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 0, i32 4, i32 5>
268  ret <4 x i32> %r
269}
270
271define <4 x i32> @test0067(<4 x i32> %a, <4 x i32> %b)
272; CHECKLE-LABEL: test0067:
273; CHECKLE:       // %bb.0:
274; CHECKLE-NEXT:    trn1 v0.4s, v0.4s, v0.4s
275; CHECKLE-NEXT:    mov v0.d[1], v1.d[1]
276; CHECKLE-NEXT:    ret
277;
278; CHECKBE-LABEL: test0067:
279; CHECKBE:       // %bb.0:
280; CHECKBE-NEXT:    rev64 v0.4s, v0.4s
281; CHECKBE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
282; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
283; CHECKBE-NEXT:    trn1 v0.4s, v0.4s, v0.4s
284; CHECKBE-NEXT:    rev64 v0.4s, v0.4s
285; CHECKBE-NEXT:    mov v0.d[1], v1.d[1]
286; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
287; CHECKBE-NEXT:    ret
288{
289  %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 6, i32 7>
290  ret <4 x i32> %r
291}
292
293define <4 x i32> @test_shuf6(<4 x i32> %a, <4 x i32> %b)
294; CHECKLE-LABEL: test_shuf6:
295; CHECKLE:       // %bb.0:
296; CHECKLE-NEXT:    mov v0.s[2], v1.s[3]
297; CHECKLE-NEXT:    trn1 v0.4s, v0.4s, v0.4s
298; CHECKLE-NEXT:    ret
299;
300; CHECKBE-LABEL: test_shuf6:
301; CHECKBE:       // %bb.0:
302; CHECKBE-NEXT:    rev64 v1.4s, v1.4s
303; CHECKBE-NEXT:    rev64 v0.4s, v0.4s
304; CHECKBE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
305; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
306; CHECKBE-NEXT:    mov v0.s[2], v1.s[3]
307; CHECKBE-NEXT:    trn1 v0.4s, v0.4s, v0.4s
308; CHECKBE-NEXT:    rev64 v0.4s, v0.4s
309; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
310; CHECKBE-NEXT:    ret
311{
312  %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 7, i32 7>
313  ret <4 x i32> %r
314}
315
316define <4 x i16> @test_shuf7(<4 x i16> %a, <4 x i16> %b)
317; CHECKLE-LABEL: test_shuf7:
318; CHECKLE:       // %bb.0:
319; CHECKLE-NEXT:    // kill: def $d0 killed $d0 def $q0
320; CHECKLE-NEXT:    // kill: def $d1 killed $d1 def $q1
321; CHECKLE-NEXT:    mov v0.h[2], v1.h[3]
322; CHECKLE-NEXT:    trn1 v0.4h, v0.4h, v0.4h
323; CHECKLE-NEXT:    ret
324;
325; CHECKBE-LABEL: test_shuf7:
326; CHECKBE:       // %bb.0:
327; CHECKBE-NEXT:    rev64 v1.4h, v1.4h
328; CHECKBE-NEXT:    rev64 v0.4h, v0.4h
329; CHECKBE-NEXT:    mov v0.h[2], v1.h[3]
330; CHECKBE-NEXT:    trn1 v0.4h, v0.4h, v0.4h
331; CHECKBE-NEXT:    rev64 v0.4h, v0.4h
332; CHECKBE-NEXT:    ret
333{
334  %r = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 0, i32 7, i32 7>
335  ret <4 x i16> %r
336}
337
338define <8 x i8> @test_shuf8(<8 x i8> %a, <8 x i8> %b)
339; CHECKLE-LABEL: test_shuf8:
340; CHECKLE:       // %bb.0:
341; CHECKLE-NEXT:    // kill: def $d0 killed $d0 def $q0
342; CHECKLE-NEXT:    // kill: def $d1 killed $d1 def $q1
343; CHECKLE-NEXT:    adrp x8, .LCPI12_0
344; CHECKLE-NEXT:    mov v0.d[1], v1.d[0]
345; CHECKLE-NEXT:    ldr d1, [x8, :lo12:.LCPI12_0]
346; CHECKLE-NEXT:    tbl v0.8b, { v0.16b }, v1.8b
347; CHECKLE-NEXT:    ret
348;
349; CHECKBE-LABEL: test_shuf8:
350; CHECKBE:       // %bb.0:
351; CHECKBE-NEXT:    rev64 v0.8b, v0.8b
352; CHECKBE-NEXT:    rev64 v1.8b, v1.8b
353; CHECKBE-NEXT:    adrp x8, .LCPI12_0
354; CHECKBE-NEXT:    add x8, x8, :lo12:.LCPI12_0
355; CHECKBE-NEXT:    mov v0.d[1], v1.d[0]
356; CHECKBE-NEXT:    ld1 { v1.8b }, [x8]
357; CHECKBE-NEXT:    tbl v0.8b, { v0.16b }, v1.8b
358; CHECKBE-NEXT:    rev64 v0.8b, v0.8b
359; CHECKBE-NEXT:    ret
360{
361  %r = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8>
362  ret <8 x i8> %r
363}
364
365define <8 x i16> @test_shuf9(<8 x i16> %a, <8 x i16> %b)
366; CHECKLE-LABEL: test_shuf9:
367; CHECKLE:       // %bb.0:
368; CHECKLE-NEXT:    adrp x8, .LCPI13_0
369; CHECKLE-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
370; CHECKLE-NEXT:    ldr q2, [x8, :lo12:.LCPI13_0]
371; CHECKLE-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
372; CHECKLE-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
373; CHECKLE-NEXT:    ret
374;
375; CHECKBE-LABEL: test_shuf9:
376; CHECKBE:       // %bb.0:
377; CHECKBE-NEXT:    rev64 v1.16b, v1.16b
378; CHECKBE-NEXT:    rev64 v0.16b, v0.16b
379; CHECKBE-NEXT:    adrp x8, .LCPI13_0
380; CHECKBE-NEXT:    add x8, x8, :lo12:.LCPI13_0
381; CHECKBE-NEXT:    ext v2.16b, v1.16b, v1.16b, #8
382; CHECKBE-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
383; CHECKBE-NEXT:    ld1 { v0.16b }, [x8]
384; CHECKBE-NEXT:    tbl v0.16b, { v1.16b, v2.16b }, v0.16b
385; CHECKBE-NEXT:    rev64 v0.16b, v0.16b
386; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
387; CHECKBE-NEXT:    ret
388{
389  %r = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8>
390  ret <8 x i16> %r
391}
392
393define <16 x i8> @test_shuf10(<16 x i8> %a, <16 x i8> %b)
394; CHECKLE-LABEL: test_shuf10:
395; CHECKLE:       // %bb.0:
396; CHECKLE-NEXT:    adrp x8, .LCPI14_0
397; CHECKLE-NEXT:    ldr q1, [x8, :lo12:.LCPI14_0]
398; CHECKLE-NEXT:    tbl v0.16b, { v0.16b }, v1.16b
399; CHECKLE-NEXT:    ret
400;
401; CHECKBE-LABEL: test_shuf10:
402; CHECKBE:       // %bb.0:
403; CHECKBE-NEXT:    rev64 v0.16b, v0.16b
404; CHECKBE-NEXT:    adrp x8, .LCPI14_0
405; CHECKBE-NEXT:    add x8, x8, :lo12:.LCPI14_0
406; CHECKBE-NEXT:    ld1 { v1.16b }, [x8]
407; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
408; CHECKBE-NEXT:    tbl v0.16b, { v0.16b }, v1.16b
409; CHECKBE-NEXT:    rev64 v0.16b, v0.16b
410; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
411; CHECKBE-NEXT:    ret
412{
413  %r = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32     8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8 >
414  ret <16 x i8> %r
415}
416
417define <8 x half> @test_shuf11(<8 x half> %a, <8 x half> %b)
418; CHECKLE-LABEL: test_shuf11:
419; CHECKLE:       // %bb.0:
420; CHECKLE-NEXT:    adrp x8, .LCPI15_0
421; CHECKLE-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
422; CHECKLE-NEXT:    ldr q2, [x8, :lo12:.LCPI15_0]
423; CHECKLE-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
424; CHECKLE-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
425; CHECKLE-NEXT:    ret
426;
427; CHECKBE-LABEL: test_shuf11:
428; CHECKBE:       // %bb.0:
429; CHECKBE-NEXT:    rev64 v1.16b, v1.16b
430; CHECKBE-NEXT:    rev64 v0.16b, v0.16b
431; CHECKBE-NEXT:    adrp x8, .LCPI15_0
432; CHECKBE-NEXT:    add x8, x8, :lo12:.LCPI15_0
433; CHECKBE-NEXT:    ext v2.16b, v1.16b, v1.16b, #8
434; CHECKBE-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
435; CHECKBE-NEXT:    ld1 { v0.16b }, [x8]
436; CHECKBE-NEXT:    tbl v0.16b, { v1.16b, v2.16b }, v0.16b
437; CHECKBE-NEXT:    rev64 v0.16b, v0.16b
438; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
439; CHECKBE-NEXT:    ret
440{
441  %r = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8>
442  ret <8 x half> %r
443}
444
445define <8 x half> @test_shuf12(<8 x half> %a, <8 x half> %b)
446; CHECKLE-LABEL: test_shuf12:
447; CHECKLE:       // %bb.0:
448; CHECKLE-NEXT:    adrp x8, .LCPI16_0
449; CHECKLE-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
450; CHECKLE-NEXT:    ldr q2, [x8, :lo12:.LCPI16_0]
451; CHECKLE-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
452; CHECKLE-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
453; CHECKLE-NEXT:    ret
454;
455; CHECKBE-LABEL: test_shuf12:
456; CHECKBE:       // %bb.0:
457; CHECKBE-NEXT:    rev64 v1.16b, v1.16b
458; CHECKBE-NEXT:    rev64 v0.16b, v0.16b
459; CHECKBE-NEXT:    adrp x8, .LCPI16_0
460; CHECKBE-NEXT:    add x8, x8, :lo12:.LCPI16_0
461; CHECKBE-NEXT:    ext v2.16b, v1.16b, v1.16b, #8
462; CHECKBE-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
463; CHECKBE-NEXT:    ld1 { v0.16b }, [x8]
464; CHECKBE-NEXT:    tbl v0.16b, { v1.16b, v2.16b }, v0.16b
465; CHECKBE-NEXT:    rev64 v0.16b, v0.16b
466; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
467; CHECKBE-NEXT:    ret
468{
469  %r = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 1, i32 15>
470  ret <8 x half> %r
471}
472
473define <8 x half> @test_shuf13(<8 x half> %a, <8 x half> %b)
474; CHECKLE-LABEL: test_shuf13:
475; CHECKLE:       // %bb.0:
476; CHECKLE-NEXT:    adrp x8, .LCPI17_0
477; CHECKLE-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
478; CHECKLE-NEXT:    ldr q2, [x8, :lo12:.LCPI17_0]
479; CHECKLE-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
480; CHECKLE-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
481; CHECKLE-NEXT:    ret
482;
483; CHECKBE-LABEL: test_shuf13:
484; CHECKBE:       // %bb.0:
485; CHECKBE-NEXT:    rev64 v1.16b, v1.16b
486; CHECKBE-NEXT:    rev64 v0.16b, v0.16b
487; CHECKBE-NEXT:    adrp x8, .LCPI17_0
488; CHECKBE-NEXT:    add x8, x8, :lo12:.LCPI17_0
489; CHECKBE-NEXT:    ext v2.16b, v1.16b, v1.16b, #8
490; CHECKBE-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
491; CHECKBE-NEXT:    ld1 { v0.16b }, [x8]
492; CHECKBE-NEXT:    tbl v0.16b, { v1.16b, v2.16b }, v0.16b
493; CHECKBE-NEXT:    rev64 v0.16b, v0.16b
494; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
495; CHECKBE-NEXT:    ret
496{
497  %r = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 2, i32 0, i32 0, i32 0, i32 8, i32 1, i32 15>
498  ret <8 x half> %r
499}
500
501define <8 x half> @test_shuf14(<8 x half> %a, <8 x half> %b)
502; CHECKLE-LABEL: test_shuf14:
503; CHECKLE:       // %bb.0:
504; CHECKLE-NEXT:    adrp x8, .LCPI18_0
505; CHECKLE-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
506; CHECKLE-NEXT:    ldr q2, [x8, :lo12:.LCPI18_0]
507; CHECKLE-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
508; CHECKLE-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
509; CHECKLE-NEXT:    ret
510;
511; CHECKBE-LABEL: test_shuf14:
512; CHECKBE:       // %bb.0:
513; CHECKBE-NEXT:    rev64 v1.16b, v1.16b
514; CHECKBE-NEXT:    rev64 v0.16b, v0.16b
515; CHECKBE-NEXT:    adrp x8, .LCPI18_0
516; CHECKBE-NEXT:    add x8, x8, :lo12:.LCPI18_0
517; CHECKBE-NEXT:    ext v2.16b, v1.16b, v1.16b, #8
518; CHECKBE-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
519; CHECKBE-NEXT:    ld1 { v0.16b }, [x8]
520; CHECKBE-NEXT:    tbl v0.16b, { v1.16b, v2.16b }, v0.16b
521; CHECKBE-NEXT:    rev64 v0.16b, v0.16b
522; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
523; CHECKBE-NEXT:    ret
524{
525  %r = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 2, i32 1, i32 1, i32 0, i32 8, i32 1, i32 15>
526  ret <8 x half> %r
527}
528
529define <8 x half> @test_shuf15(<8 x half> %a, <8 x half> %b)
530; CHECKLE-LABEL: test_shuf15:
531; CHECKLE:       // %bb.0:
532; CHECKLE-NEXT:    adrp x8, .LCPI19_0
533; CHECKLE-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
534; CHECKLE-NEXT:    ldr q2, [x8, :lo12:.LCPI19_0]
535; CHECKLE-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
536; CHECKLE-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
537; CHECKLE-NEXT:    ret
538;
539; CHECKBE-LABEL: test_shuf15:
540; CHECKBE:       // %bb.0:
541; CHECKBE-NEXT:    rev64 v1.16b, v1.16b
542; CHECKBE-NEXT:    rev64 v0.16b, v0.16b
543; CHECKBE-NEXT:    adrp x8, .LCPI19_0
544; CHECKBE-NEXT:    add x8, x8, :lo12:.LCPI19_0
545; CHECKBE-NEXT:    ext v2.16b, v1.16b, v1.16b, #8
546; CHECKBE-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
547; CHECKBE-NEXT:    ld1 { v0.16b }, [x8]
548; CHECKBE-NEXT:    tbl v0.16b, { v1.16b, v2.16b }, v0.16b
549; CHECKBE-NEXT:    rev64 v0.16b, v0.16b
550; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
551; CHECKBE-NEXT:    ret
552{
553  %r = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 2, i32 7, i32 2, i32 0, i32 3, i32 2, i32 15>
554  ret <8 x half> %r
555}
556
557define <4 x i32> @extract_shuffle(<8 x i16> %j, <4 x i16> %k) {
558; CHECKLE-LABEL: extract_shuffle:
559; CHECKLE:       // %bb.0:
560; CHECKLE-NEXT:    ushll2 v0.4s, v0.8h, #3
561; CHECKLE-NEXT:    ret
562;
563; CHECKBE-LABEL: extract_shuffle:
564; CHECKBE:       // %bb.0:
565; CHECKBE-NEXT:    rev64 v0.8h, v0.8h
566; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
567; CHECKBE-NEXT:    ushll2 v0.4s, v0.8h, #3
568; CHECKBE-NEXT:    rev64 v0.4s, v0.4s
569; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
570; CHECKBE-NEXT:    ret
571  %a = shufflevector <8 x i16> %j, <8 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
572  %b = shufflevector <8 x i16> %a, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
573  %c = zext <4 x i16> %b to <4 x i32>
574  %d = shl <4 x i32> %c, <i32 3, i32 3, i32 3, i32 3>
575  ret <4 x i32> %d
576}
577
578