xref: /llvm-project/llvm/test/CodeGen/AArch64/concat-vector.ll (revision 61510b51c33464a6bc15e4cf5b1ee07e2e0ec1c9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3; RUN: llc -mtriple=aarch64 -global-isel %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4
5define <4 x i8> @concat1(<2 x i8> %A, <2 x i8> %B) {
6; CHECK-SD-LABEL: concat1:
7; CHECK-SD:       // %bb.0:
8; CHECK-SD-NEXT:    uzp1 v0.4h, v0.4h, v1.4h
9; CHECK-SD-NEXT:    ret
10;
11; CHECK-GI-LABEL: concat1:
12; CHECK-GI:       // %bb.0:
13; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
14; CHECK-GI-NEXT:    mov w8, v0.s[1]
15; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
16; CHECK-GI-NEXT:    mov w9, v1.s[1]
17; CHECK-GI-NEXT:    mov v0.h[1], w8
18; CHECK-GI-NEXT:    fmov w8, s1
19; CHECK-GI-NEXT:    mov v0.h[2], w8
20; CHECK-GI-NEXT:    mov v0.h[3], w9
21; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
22; CHECK-GI-NEXT:    ret
23   %v4i8 = shufflevector <2 x i8> %A, <2 x i8> %B, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
24   ret <4 x i8> %v4i8
25}
26
27define <8 x i8> @concat2(<4 x i8> %A, <4 x i8> %B) {
28; CHECK-SD-LABEL: concat2:
29; CHECK-SD:       // %bb.0:
30; CHECK-SD-NEXT:    uzp1 v0.8b, v0.8b, v1.8b
31; CHECK-SD-NEXT:    ret
32;
33; CHECK-GI-LABEL: concat2:
34; CHECK-GI:       // %bb.0:
35; CHECK-GI-NEXT:    uzp1 v0.8b, v0.8b, v0.8b
36; CHECK-GI-NEXT:    uzp1 v1.8b, v1.8b, v0.8b
37; CHECK-GI-NEXT:    fmov w8, s0
38; CHECK-GI-NEXT:    mov v0.s[0], w8
39; CHECK-GI-NEXT:    fmov w8, s1
40; CHECK-GI-NEXT:    mov v0.s[1], w8
41; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
42; CHECK-GI-NEXT:    ret
43   %v8i8 = shufflevector <4 x i8> %A, <4 x i8> %B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
44   ret <8 x i8> %v8i8
45}
46
47define <16 x i8> @concat3(<8 x i8> %A, <8 x i8> %B) {
48; CHECK-LABEL: concat3:
49; CHECK:       // %bb.0:
50; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
51; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
52; CHECK-NEXT:    mov v0.d[1], v1.d[0]
53; CHECK-NEXT:    ret
54   %v16i8 = shufflevector <8 x i8> %A, <8 x i8> %B, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
55   ret <16 x i8> %v16i8
56}
57
58define <4 x i16> @concat4(<2 x i16> %A, <2 x i16> %B) {
59; CHECK-SD-LABEL: concat4:
60; CHECK-SD:       // %bb.0:
61; CHECK-SD-NEXT:    uzp1 v0.4h, v0.4h, v1.4h
62; CHECK-SD-NEXT:    ret
63;
64; CHECK-GI-LABEL: concat4:
65; CHECK-GI:       // %bb.0:
66; CHECK-GI-NEXT:    uzp1 v0.4h, v0.4h, v0.4h
67; CHECK-GI-NEXT:    uzp1 v1.4h, v1.4h, v0.4h
68; CHECK-GI-NEXT:    fmov w8, s0
69; CHECK-GI-NEXT:    mov v0.s[0], w8
70; CHECK-GI-NEXT:    fmov w8, s1
71; CHECK-GI-NEXT:    mov v0.s[1], w8
72; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
73; CHECK-GI-NEXT:    ret
74   %v4i16 = shufflevector <2 x i16> %A, <2 x i16> %B, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
75   ret <4 x i16> %v4i16
76}
77
78define <8 x i16> @concat5(<4 x i16> %A, <4 x i16> %B) {
79; CHECK-LABEL: concat5:
80; CHECK:       // %bb.0:
81; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
82; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
83; CHECK-NEXT:    mov v0.d[1], v1.d[0]
84; CHECK-NEXT:    ret
85   %v8i16 = shufflevector <4 x i16> %A, <4 x i16> %B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
86   ret <8 x i16> %v8i16
87}
88
89define <16 x i16> @concat6(ptr %A, ptr %B) {
90; CHECK-LABEL: concat6:
91; CHECK:       // %bb.0:
92; CHECK-NEXT:    ldr q0, [x0]
93; CHECK-NEXT:    ldr q1, [x1]
94; CHECK-NEXT:    ret
95   %tmp1 = load <8 x i16>, ptr %A
96   %tmp2 = load <8 x i16>, ptr %B
97   %v16i16 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
98   ret <16 x i16> %v16i16
99}
100
101define <4 x i32> @concat7(<2 x i32> %A, <2 x i32> %B) {
102; CHECK-LABEL: concat7:
103; CHECK:       // %bb.0:
104; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
105; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
106; CHECK-NEXT:    mov v0.d[1], v1.d[0]
107; CHECK-NEXT:    ret
108   %v4i32 = shufflevector <2 x i32> %A, <2 x i32> %B, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
109   ret <4 x i32> %v4i32
110}
111
112define <8 x i32> @concat8(ptr %A, ptr %B) {
113; CHECK-LABEL: concat8:
114; CHECK:       // %bb.0:
115; CHECK-NEXT:    ldr q0, [x0]
116; CHECK-NEXT:    ldr q1, [x1]
117; CHECK-NEXT:    ret
118   %tmp1 = load <4 x i32>, ptr %A
119   %tmp2 = load <4 x i32>, ptr %B
120   %v8i32 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
121   ret <8 x i32> %v8i32
122}
123
124define <4 x half> @concat9(<2 x half> %A, <2 x half> %B) {
125; CHECK-SD-LABEL: concat9:
126; CHECK-SD:       // %bb.0:
127; CHECK-SD-NEXT:    zip1 v0.2s, v0.2s, v1.2s
128; CHECK-SD-NEXT:    ret
129;
130; CHECK-GI-LABEL: concat9:
131; CHECK-GI:       // %bb.0:
132; CHECK-GI-NEXT:    fmov w8, s0
133; CHECK-GI-NEXT:    mov v0.s[0], w8
134; CHECK-GI-NEXT:    fmov w8, s1
135; CHECK-GI-NEXT:    mov v0.s[1], w8
136; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
137; CHECK-GI-NEXT:    ret
138   %v4half= shufflevector <2 x half> %A, <2 x half> %B, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
139   ret <4 x half> %v4half
140}
141
142define <8 x half> @concat10(<4 x half> %A, <4 x half> %B) {
143; CHECK-LABEL: concat10:
144; CHECK:       // %bb.0:
145; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
146; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
147; CHECK-NEXT:    mov v0.d[1], v1.d[0]
148; CHECK-NEXT:    ret
149   %v8half= shufflevector <4 x half> %A, <4 x half> %B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
150   ret <8 x half> %v8half
151}
152
153define <16 x half> @concat11(<8 x half> %A, <8 x half> %B) {
154; CHECK-LABEL: concat11:
155; CHECK:       // %bb.0:
156; CHECK-NEXT:    ret
157   %v16half= shufflevector <8 x half> %A, <8 x half> %B, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
158   ret <16 x half> %v16half
159}
160
161define <8 x i16> @concat_v8s16_v2s16(ptr %ptr) {
162; CHECK-SD-LABEL: concat_v8s16_v2s16:
163; CHECK-SD:       // %bb.0:
164; CHECK-SD-NEXT:    ldr s0, [x0]
165; CHECK-SD-NEXT:    ret
166;
167; CHECK-GI-LABEL: concat_v8s16_v2s16:
168; CHECK-GI:       // %bb.0:
169; CHECK-GI-NEXT:    ldrh w8, [x0]
170; CHECK-GI-NEXT:    ldrh w9, [x0, #2]
171; CHECK-GI-NEXT:    fmov s1, w8
172; CHECK-GI-NEXT:    mov v1.h[1], w9
173; CHECK-GI-NEXT:    mov v0.s[0], v1.s[0]
174; CHECK-GI-NEXT:    ret
175    %a = load <2 x i16>, ptr %ptr
176    %b = shufflevector <2 x i16> %a, <2 x i16> %a, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
177    ret <8 x i16> %b
178}
179
180define <16 x i8> @concat_v16s8_v4s8(ptr %ptr) {
181; CHECK-LABEL: concat_v16s8_v4s8:
182; CHECK:       // %bb.0:
183; CHECK-NEXT:    ldr s0, [x0]
184; CHECK-NEXT:    ret
185    %a = load <4 x i8>, ptr %ptr
186    %b = shufflevector <4 x i8> %a, <4 x i8> %a, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
187    ret <16 x i8> %b
188}
189
190define <16 x i8> @concat_v16s8_v4s8_load(ptr %ptrA, ptr %ptrB, ptr %ptrC, ptr %ptrD) {
191; CHECK-LABEL: concat_v16s8_v4s8_load:
192; CHECK:       // %bb.0:
193; CHECK-NEXT:    ldr s0, [x0]
194; CHECK-NEXT:    ld1 { v0.s }[1], [x1]
195; CHECK-NEXT:    ld1 { v0.s }[2], [x2]
196; CHECK-NEXT:    ld1 { v0.s }[3], [x3]
197; CHECK-NEXT:    ret
198    %A = load <4 x i8>, ptr %ptrA
199    %B = load <4 x i8>, ptr %ptrB
200    %C = load <4 x i8>, ptr %ptrC
201    %D = load <4 x i8>, ptr %ptrD
202    %b = shufflevector <4 x i8> %A, <4 x i8> %B, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
203    %c = shufflevector <4 x i8> %C, <4 x i8> %D, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
204    %d = shufflevector <16 x i8> %b, <16 x i8> %c, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
205    ret <16 x i8> %d
206}
207
208
209define <16 x i8> @concat_v16s8_v4s8_reg(<4 x i8> %A, <4 x i8> %B, <4 x i8> %C, <4 x i8> %D) {
210; CHECK-SD-LABEL: concat_v16s8_v4s8_reg:
211; CHECK-SD:       // %bb.0:
212; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
213; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
214; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
215; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
216; CHECK-SD-NEXT:    mov v2.d[1], v3.d[0]
217; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
218; CHECK-SD-NEXT:    uzp1 v0.16b, v0.16b, v2.16b
219; CHECK-SD-NEXT:    ret
220;
221; CHECK-GI-LABEL: concat_v16s8_v4s8_reg:
222; CHECK-GI:       // %bb.0:
223; CHECK-GI-NEXT:    uzp1 v0.8b, v0.8b, v0.8b
224; CHECK-GI-NEXT:    uzp1 v1.8b, v1.8b, v0.8b
225; CHECK-GI-NEXT:    fmov w8, s0
226; CHECK-GI-NEXT:    mov v0.s[0], w8
227; CHECK-GI-NEXT:    fmov w8, s1
228; CHECK-GI-NEXT:    uzp1 v2.8b, v2.8b, v0.8b
229; CHECK-GI-NEXT:    mov v0.s[1], w8
230; CHECK-GI-NEXT:    uzp1 v1.8b, v3.8b, v0.8b
231; CHECK-GI-NEXT:    fmov w8, s2
232; CHECK-GI-NEXT:    mov v0.s[2], w8
233; CHECK-GI-NEXT:    fmov w8, s1
234; CHECK-GI-NEXT:    mov v0.s[3], w8
235; CHECK-GI-NEXT:    ret
236    %b = shufflevector <4 x i8> %A, <4 x i8> %B, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
237    %c = shufflevector <4 x i8> %C, <4 x i8> %D, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
238    %d = shufflevector <16 x i8> %b, <16 x i8> %c, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
239    ret <16 x i8> %d
240}
241
242define <8 x i16> @concat_v8s16_v2s16_reg(<2 x i16> %A, <2 x i16> %B, <2 x i16> %C, <2 x i16> %D) {
243; CHECK-SD-LABEL: concat_v8s16_v2s16_reg:
244; CHECK-SD:       // %bb.0:
245; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
246; CHECK-SD-NEXT:    adrp x8, .LCPI15_0
247; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
248; CHECK-SD-NEXT:    ldr q4, [x8, :lo12:.LCPI15_0]
249; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
250; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
251; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b
252; CHECK-SD-NEXT:    ret
253;
254; CHECK-GI-LABEL: concat_v8s16_v2s16_reg:
255; CHECK-GI:       // %bb.0:
256; CHECK-GI-NEXT:    uzp1 v0.4h, v0.4h, v0.4h
257; CHECK-GI-NEXT:    uzp1 v1.4h, v1.4h, v0.4h
258; CHECK-GI-NEXT:    fmov w8, s0
259; CHECK-GI-NEXT:    mov v0.s[0], w8
260; CHECK-GI-NEXT:    fmov w8, s1
261; CHECK-GI-NEXT:    uzp1 v2.4h, v2.4h, v0.4h
262; CHECK-GI-NEXT:    mov v0.s[1], w8
263; CHECK-GI-NEXT:    uzp1 v1.4h, v3.4h, v0.4h
264; CHECK-GI-NEXT:    fmov w8, s2
265; CHECK-GI-NEXT:    mov v0.s[2], w8
266; CHECK-GI-NEXT:    fmov w8, s1
267; CHECK-GI-NEXT:    mov v0.s[3], w8
268; CHECK-GI-NEXT:    ret
269    %b = shufflevector <2 x i16> %A, <2 x i16> %B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
270    %c = shufflevector <2 x i16> %C, <2 x i16> %D, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
271    %d = shufflevector <8 x i16> %b, <8 x i16> %c, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
272    ret <8 x i16> %d
273}
274
275define <4 x i16> @concat_undef_first_use_first(ptr %p1, ptr %p2) {
276; CHECK-SD-LABEL: concat_undef_first_use_first:
277; CHECK-SD:       // %bb.0:
278; CHECK-SD-NEXT:    ld1r { v0.2s }, [x0]
279; CHECK-SD-NEXT:    ret
280;
281; CHECK-GI-LABEL: concat_undef_first_use_first:
282; CHECK-GI:       // %bb.0:
283; CHECK-GI-NEXT:    ldrh w8, [x0]
284; CHECK-GI-NEXT:    ldrh w9, [x0, #2]
285; CHECK-GI-NEXT:    fmov s1, w8
286; CHECK-GI-NEXT:    mov v1.h[1], w9
287; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
288; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
289; CHECK-GI-NEXT:    ret
290  %l1 = load <2 x i16>, ptr %p1
291  %l2 = load <2 x i16>, ptr %p2
292  %ext1 = shufflevector <2 x i16> %l1, <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
293  %ext2 = shufflevector <2 x i16> %l1, <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
294  %t = shufflevector <8 x i16> %ext1, <8 x i16> %ext2, <4 x i32> <i32 poison, i32 poison, i32 0, i32 1>
295  ret <4 x i16> %t
296}
297
298define <4 x i16> @concat_undef_first_use_second(ptr %p1, ptr %p2) {
299; CHECK-SD-LABEL: concat_undef_first_use_second:
300; CHECK-SD:       // %bb.0:
301; CHECK-SD-NEXT:    ld1r { v0.2s }, [x0]
302; CHECK-SD-NEXT:    ret
303;
304; CHECK-GI-LABEL: concat_undef_first_use_second:
305; CHECK-GI:       // %bb.0:
306; CHECK-GI-NEXT:    ldrh w8, [x0]
307; CHECK-GI-NEXT:    ldrh w9, [x0, #2]
308; CHECK-GI-NEXT:    fmov s1, w8
309; CHECK-GI-NEXT:    mov v1.h[1], w9
310; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
311; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
312; CHECK-GI-NEXT:    ret
313  %l1 = load <2 x i16>, ptr %p1
314  %l2 = load <2 x i16>, ptr %p2
315  %ext1 = shufflevector <2 x i16> %l1, <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
316  %ext2 = shufflevector <2 x i16> %l1, <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
317  %t = shufflevector <8 x i16> %ext1, <8 x i16> %ext2, <4 x i32> <i32 poison, i32 poison, i32 8, i32 9>
318  ret <4 x i16> %t
319}
320
321define <4 x i16> @concat_undef_first_use_undef(ptr %p1, ptr %p2) {
322; CHECK-SD-LABEL: concat_undef_first_use_undef:
323; CHECK-SD:       // %bb.0:
324; CHECK-SD-NEXT:    ldr s0, [x0]
325; CHECK-SD-NEXT:    ret
326;
327; CHECK-GI-LABEL: concat_undef_first_use_undef:
328; CHECK-GI:       // %bb.0:
329; CHECK-GI-NEXT:    ret
330  %l1 = load <2 x i16>, ptr %p1
331  %l2 = load <2 x i16>, ptr %p2
332  %ext1 = shufflevector <2 x i16> %l1, <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
333  %ext2 = shufflevector <2 x i16> %l1, <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
334  %t = shufflevector <8 x i16> %ext1, <8 x i16> %ext2, <4 x i32> <i32 poison, i32 poison, i32 2, i32 3>
335  ret <4 x i16> %t
336}
337
338define <8 x i16> @concat_low_low_v8i16(<8 x i16> %a_vec, <8 x i16> %b_vec) {
339; CHECK-LABEL: concat_low_low_v8i16:
340; CHECK:       // %bb.0: // %entry
341; CHECK-NEXT:    mov v0.d[1], v1.d[0]
342; CHECK-NEXT:    ret
343entry:
344  %shuffle.i3 = shufflevector <8 x i16> %a_vec, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
345  %shuffle.i = shufflevector <8 x i16> %b_vec, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
346  %shuffle.i4 = shufflevector <4 x i16> %shuffle.i3, <4 x i16> %shuffle.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
347  ret <8 x i16> %shuffle.i4
348}
349
350define <8 x i16> @concat_high_low_v8i16(<8 x i16> %a_vec, <8 x i16> %b_vec) {
351; CHECK-SD-LABEL: concat_high_low_v8i16:
352; CHECK-SD:       // %bb.0: // %entry
353; CHECK-SD-NEXT:    ext v0.16b, v0.16b, v1.16b, #8
354; CHECK-SD-NEXT:    ret
355;
356; CHECK-GI-LABEL: concat_high_low_v8i16:
357; CHECK-GI:       // %bb.0: // %entry
358; CHECK-GI-NEXT:    mov d0, v0.d[1]
359; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
360; CHECK-GI-NEXT:    ret
361entry:
362  %shuffle.i3 = shufflevector <8 x i16> %a_vec, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
363  %shuffle.i = shufflevector <8 x i16> %b_vec, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
364  %shuffle.i4 = shufflevector <4 x i16> %shuffle.i3, <4 x i16> %shuffle.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
365  ret <8 x i16> %shuffle.i4
366}
367
368define <8 x i16> @concat_low_high_v8i16(<8 x i16> %a_vec, <8 x i16> %b_vec) {
369; CHECK-SD-LABEL: concat_low_high_v8i16:
370; CHECK-SD:       // %bb.0: // %entry
371; CHECK-SD-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
372; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
373; CHECK-SD-NEXT:    ret
374;
375; CHECK-GI-LABEL: concat_low_high_v8i16:
376; CHECK-GI:       // %bb.0: // %entry
377; CHECK-GI-NEXT:    mov d1, v1.d[1]
378; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
379; CHECK-GI-NEXT:    ret
380entry:
381  %shuffle.i3 = shufflevector <8 x i16> %a_vec, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
382  %shuffle.i = shufflevector <8 x i16> %b_vec, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
383  %shuffle.i4 = shufflevector <4 x i16> %shuffle.i3, <4 x i16> %shuffle.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
384  ret <8 x i16> %shuffle.i4
385}
386
387define <8 x i16> @concat_high_high_v8i16(<8 x i16> %a_vec, <8 x i16> %b_vec) {
388; CHECK-LABEL: concat_high_high_v8i16:
389; CHECK:       // %bb.0: // %entry
390; CHECK-NEXT:    mov v1.d[0], v0.d[1]
391; CHECK-NEXT:    mov v0.16b, v1.16b
392; CHECK-NEXT:    ret
393entry:
394  %shuffle.i3 = shufflevector <8 x i16> %a_vec, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
395  %shuffle.i = shufflevector <8 x i16> %b_vec, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
396  %shuffle.i4 = shufflevector <4 x i16> %shuffle.i3, <4 x i16> %shuffle.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
397  ret <8 x i16> %shuffle.i4
398}
399
400define <8 x half> @concat_high_high_v8f16(<8 x half> %a_vec, <8 x half> %b_vec) {
401; CHECK-LABEL: concat_high_high_v8f16:
402; CHECK:       // %bb.0: // %entry
403; CHECK-NEXT:    mov v1.d[0], v0.d[1]
404; CHECK-NEXT:    mov v0.16b, v1.16b
405; CHECK-NEXT:    ret
406entry:
407  %shuffle.i3 = shufflevector <8 x half> %a_vec, <8 x half> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
408  %shuffle.i = shufflevector <8 x half> %b_vec, <8 x half> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
409  %shuffle.i4 = shufflevector <4 x half> %shuffle.i3, <4 x half> %shuffle.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
410  ret <8 x half> %shuffle.i4
411}
412
413define <8 x bfloat> @concat_high_high_v8bf16(<8 x bfloat> %a_vec, <8 x bfloat> %b_vec) {
414; CHECK-LABEL: concat_high_high_v8bf16:
415; CHECK:       // %bb.0: // %entry
416; CHECK-NEXT:    mov v1.d[0], v0.d[1]
417; CHECK-NEXT:    mov v0.16b, v1.16b
418; CHECK-NEXT:    ret
419entry:
420  %shuffle.i3 = shufflevector <8 x bfloat> %a_vec, <8 x bfloat> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
421  %shuffle.i = shufflevector <8 x bfloat> %b_vec, <8 x bfloat> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
422  %shuffle.i4 = shufflevector <4 x bfloat> %shuffle.i3, <4 x bfloat> %shuffle.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
423  ret <8 x bfloat> %shuffle.i4
424}
425
426define <4 x i32> @concat_high_high_v4i32(<4 x i32> %a_vec, <4 x i32> %b_vec) {
427; CHECK-SD-LABEL: concat_high_high_v4i32:
428; CHECK-SD:       // %bb.0: // %entry
429; CHECK-SD-NEXT:    zip2 v0.2d, v0.2d, v1.2d
430; CHECK-SD-NEXT:    ret
431;
432; CHECK-GI-LABEL: concat_high_high_v4i32:
433; CHECK-GI:       // %bb.0: // %entry
434; CHECK-GI-NEXT:    mov v1.d[0], v0.d[1]
435; CHECK-GI-NEXT:    mov v0.16b, v1.16b
436; CHECK-GI-NEXT:    ret
437entry:
438  %shuffle.i3 = shufflevector <4 x i32> %a_vec, <4 x i32> poison, <2 x i32> <i32 2, i32 3>
439  %shuffle.i = shufflevector <4 x i32> %b_vec, <4 x i32> poison, <2 x i32> <i32 2, i32 3>
440  %shuffle.i4 = shufflevector <2 x i32> %shuffle.i3, <2 x i32> %shuffle.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
441  ret <4 x i32> %shuffle.i4
442}
443
444define <4 x float> @concat_high_high_v4f32(<4 x float> %a_vec, <4 x float> %b_vec) {
445; CHECK-SD-LABEL: concat_high_high_v4f32:
446; CHECK-SD:       // %bb.0: // %entry
447; CHECK-SD-NEXT:    zip2 v0.2d, v0.2d, v1.2d
448; CHECK-SD-NEXT:    ret
449;
450; CHECK-GI-LABEL: concat_high_high_v4f32:
451; CHECK-GI:       // %bb.0: // %entry
452; CHECK-GI-NEXT:    mov v1.d[0], v0.d[1]
453; CHECK-GI-NEXT:    mov v0.16b, v1.16b
454; CHECK-GI-NEXT:    ret
455entry:
456  %shuffle.i3 = shufflevector <4 x float> %a_vec, <4 x float> poison, <2 x i32> <i32 2, i32 3>
457  %shuffle.i = shufflevector <4 x float> %b_vec, <4 x float> poison, <2 x i32> <i32 2, i32 3>
458  %shuffle.i4 = shufflevector <2 x float> %shuffle.i3, <2 x float> %shuffle.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
459  ret <4 x float> %shuffle.i4
460}
461
462define <16 x i8> @concat_high_high_v16i8(<16 x i8> %a_vec, <16 x i8> %b_vec) {
463; CHECK-LABEL: concat_high_high_v16i8:
464; CHECK:       // %bb.0: // %entry
465; CHECK-NEXT:    mov v1.d[0], v0.d[1]
466; CHECK-NEXT:    mov v0.16b, v1.16b
467; CHECK-NEXT:    ret
468entry:
469  %shuffle.i3 = shufflevector <16 x i8> %a_vec, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
470  %shuffle.i = shufflevector <16 x i8> %b_vec, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
471  %shuffle.i4 = shufflevector <8 x i8> %shuffle.i3, <8 x i8> %shuffle.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
472  ret <16 x i8> %shuffle.i4
473}
474
475define <2 x i64> @concat_high_high_v2i64(<2 x i64> %a_vec, <2 x i64> %b_vec) {
476; CHECK-SD-LABEL: concat_high_high_v2i64:
477; CHECK-SD:       // %bb.0: // %entry
478; CHECK-SD-NEXT:    zip2 v0.2d, v0.2d, v1.2d
479; CHECK-SD-NEXT:    ret
480;
481; CHECK-GI-LABEL: concat_high_high_v2i64:
482; CHECK-GI:       // %bb.0: // %entry
483; CHECK-GI-NEXT:    mov v0.d[0], v0.d[1]
484; CHECK-GI-NEXT:    mov v0.d[1], v1.d[1]
485; CHECK-GI-NEXT:    ret
486entry:
487  %shuffle.i3 = shufflevector <2 x i64> %a_vec, <2 x i64> poison, <1 x i32> <i32 1>
488  %shuffle.i = shufflevector <2 x i64> %b_vec, <2 x i64> poison, <1 x i32> <i32 1>
489  %shuffle.i4 = shufflevector <1 x i64> %shuffle.i3, <1 x i64> %shuffle.i, <2 x i32> <i32 0, i32 1>
490  ret <2 x i64> %shuffle.i4
491}
492
493define <2 x double> @concat_high_high_v2f64(<2 x double> %a_vec, <2 x double> %b_vec) {
494; CHECK-SD-LABEL: concat_high_high_v2f64:
495; CHECK-SD:       // %bb.0: // %entry
496; CHECK-SD-NEXT:    zip2 v0.2d, v0.2d, v1.2d
497; CHECK-SD-NEXT:    ret
498;
499; CHECK-GI-LABEL: concat_high_high_v2f64:
500; CHECK-GI:       // %bb.0: // %entry
501; CHECK-GI-NEXT:    mov v0.d[0], v0.d[1]
502; CHECK-GI-NEXT:    mov v0.d[1], v1.d[1]
503; CHECK-GI-NEXT:    ret
504entry:
505  %shuffle.i3 = shufflevector <2 x double> %a_vec, <2 x double> poison, <1 x i32> <i32 1>
506  %shuffle.i = shufflevector <2 x double> %b_vec, <2 x double> poison, <1 x i32> <i32 1>
507  %shuffle.i4 = shufflevector <1 x double> %shuffle.i3, <1 x double> %shuffle.i, <2 x i32> <i32 0, i32 1>
508  ret <2 x double> %shuffle.i4
509}
510