xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll (revision 61510b51c33464a6bc15e4cf5b1ee07e2e0ec1c9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mattr=+sve2 -force-streaming-compatible  < %s | FileCheck %s --check-prefixes=CHECK
3; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s --check-prefixes=CHECK
4; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
5
6target triple = "aarch64-unknown-linux-gnu"
7
8;
9; i8
10;
11
12define <8 x i8> @concat_v8i8(<4 x i8> %op1, <4 x i8> %op2)  {
13; CHECK-LABEL: concat_v8i8:
14; CHECK:       // %bb.0:
15; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
16; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
17; CHECK-NEXT:    mov z2.h, z1.h[3]
18; CHECK-NEXT:    mov z3.h, z1.h[2]
19; CHECK-NEXT:    mov z4.h, z1.h[1]
20; CHECK-NEXT:    mov z5.h, z0.h[3]
21; CHECK-NEXT:    mov z6.h, z0.h[2]
22; CHECK-NEXT:    mov z7.h, z0.h[1]
23; CHECK-NEXT:    zip1 z2.b, z3.b, z2.b
24; CHECK-NEXT:    zip1 z1.b, z1.b, z4.b
25; CHECK-NEXT:    zip1 z3.b, z6.b, z5.b
26; CHECK-NEXT:    zip1 z0.b, z0.b, z7.b
27; CHECK-NEXT:    zip1 z1.h, z1.h, z2.h
28; CHECK-NEXT:    zip1 z0.h, z0.h, z3.h
29; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
30; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
31; CHECK-NEXT:    ret
32;
33; NONEON-NOSVE-LABEL: concat_v8i8:
34; NONEON-NOSVE:       // %bb.0:
35; NONEON-NOSVE-NEXT:    sub sp, sp, #32
36; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
37; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #8]
38; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #22]
39; NONEON-NOSVE-NEXT:    strb w8, [sp, #31]
40; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #20]
41; NONEON-NOSVE-NEXT:    strb w8, [sp, #30]
42; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #18]
43; NONEON-NOSVE-NEXT:    strb w8, [sp, #29]
44; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #16]
45; NONEON-NOSVE-NEXT:    strb w8, [sp, #28]
46; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #14]
47; NONEON-NOSVE-NEXT:    strb w8, [sp, #27]
48; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #12]
49; NONEON-NOSVE-NEXT:    strb w8, [sp, #26]
50; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #10]
51; NONEON-NOSVE-NEXT:    strb w8, [sp, #25]
52; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #8]
53; NONEON-NOSVE-NEXT:    strb w8, [sp, #24]
54; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
55; NONEON-NOSVE-NEXT:    add sp, sp, #32
56; NONEON-NOSVE-NEXT:    ret
57  %res = shufflevector <4 x i8> %op1, <4 x i8> %op2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
58  ret <8 x i8> %res
59}
60
61define <16 x i8> @concat_v16i8(<8 x i8> %op1, <8 x i8> %op2)  {
62; CHECK-LABEL: concat_v16i8:
63; CHECK:       // %bb.0:
64; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
65; CHECK-NEXT:    ptrue p0.b, vl8
66; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
67; CHECK-NEXT:    splice z0.b, p0, { z0.b, z1.b }
68; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
69; CHECK-NEXT:    ret
70;
71; NONEON-NOSVE-LABEL: concat_v16i8:
72; NONEON-NOSVE:       // %bb.0:
73; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #-16]!
74; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
75; NONEON-NOSVE-NEXT:    ldr q0, [sp], #16
76; NONEON-NOSVE-NEXT:    ret
77  %res = shufflevector <8 x i8> %op1, <8 x i8> %op2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
78                                                                 i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
79  ret <16 x i8> %res
80}
81
82define void @concat_v32i8(ptr %a, ptr %b, ptr %c)  {
83; CHECK-LABEL: concat_v32i8:
84; CHECK:       // %bb.0:
85; CHECK-NEXT:    ldr q0, [x1]
86; CHECK-NEXT:    ldr q1, [x0]
87; CHECK-NEXT:    stp q1, q0, [x2]
88; CHECK-NEXT:    ret
89;
90; NONEON-NOSVE-LABEL: concat_v32i8:
91; NONEON-NOSVE:       // %bb.0:
92; NONEON-NOSVE-NEXT:    ldr q0, [x1]
93; NONEON-NOSVE-NEXT:    ldr q1, [x0]
94; NONEON-NOSVE-NEXT:    stp q1, q0, [x2]
95; NONEON-NOSVE-NEXT:    ret
96  %op1 = load <16 x i8>, ptr %a
97  %op2 = load <16 x i8>, ptr %b
98  %res = shufflevector <16 x i8> %op1, <16 x i8> %op2, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
99                                                                   i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15,
100                                                                   i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23,
101                                                                   i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
102  store <32 x i8> %res, ptr %c
103  ret void
104}
105
106define void @concat_v64i8(ptr %a, ptr %b, ptr %c) {
107; CHECK-LABEL: concat_v64i8:
108; CHECK:       // %bb.0:
109; CHECK-NEXT:    ldp q0, q1, [x1]
110; CHECK-NEXT:    ldp q3, q2, [x0]
111; CHECK-NEXT:    stp q0, q1, [x2, #32]
112; CHECK-NEXT:    stp q3, q2, [x2]
113; CHECK-NEXT:    ret
114;
115; NONEON-NOSVE-LABEL: concat_v64i8:
116; NONEON-NOSVE:       // %bb.0:
117; NONEON-NOSVE-NEXT:    ldp q0, q1, [x1]
118; NONEON-NOSVE-NEXT:    ldp q3, q2, [x0]
119; NONEON-NOSVE-NEXT:    stp q0, q1, [x2, #32]
120; NONEON-NOSVE-NEXT:    stp q3, q2, [x2]
121; NONEON-NOSVE-NEXT:    ret
122  %op1 = load <32 x i8>, ptr %a
123  %op2 = load <32 x i8>, ptr %b
124  %res = shufflevector <32 x i8> %op1, <32 x i8> %op2, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
125                                                                   i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15,
126                                                                   i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23,
127                                                                   i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31,
128                                                                   i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39,
129                                                                   i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47,
130                                                                   i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55,
131                                                                   i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
132  store <64 x i8> %res, ptr %c
133  ret void
134}
135
136;
137; i16
138;
139
140define <4 x i16> @concat_v4i16(<2 x i16> %op1, <2 x i16> %op2)  {
141; CHECK-LABEL: concat_v4i16:
142; CHECK:       // %bb.0:
143; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
144; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
145; CHECK-NEXT:    mov z2.s, z1.s[1]
146; CHECK-NEXT:    mov z3.s, z0.s[1]
147; CHECK-NEXT:    zip1 z1.h, z1.h, z2.h
148; CHECK-NEXT:    zip1 z0.h, z0.h, z3.h
149; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
150; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
151; CHECK-NEXT:    ret
152;
153; NONEON-NOSVE-LABEL: concat_v4i16:
154; NONEON-NOSVE:       // %bb.0:
155; NONEON-NOSVE-NEXT:    sub sp, sp, #32
156; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
157; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #8]
158; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #16]
159; NONEON-NOSVE-NEXT:    strh w9, [sp, #30]
160; NONEON-NOSVE-NEXT:    strh w8, [sp, #28]
161; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #8]
162; NONEON-NOSVE-NEXT:    strh w9, [sp, #26]
163; NONEON-NOSVE-NEXT:    strh w8, [sp, #24]
164; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
165; NONEON-NOSVE-NEXT:    add sp, sp, #32
166; NONEON-NOSVE-NEXT:    ret
167  %res = shufflevector <2 x i16> %op1, <2 x i16> %op2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
168  ret <4 x i16> %res
169}
170
171; Don't use SVE for 128-bit vectors.
172define <8 x i16> @concat_v8i16(<4 x i16> %op1, <4 x i16> %op2)  {
173; CHECK-LABEL: concat_v8i16:
174; CHECK:       // %bb.0:
175; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
176; CHECK-NEXT:    ptrue p0.h, vl4
177; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
178; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
179; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
180; CHECK-NEXT:    ret
181;
182; NONEON-NOSVE-LABEL: concat_v8i16:
183; NONEON-NOSVE:       // %bb.0:
184; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #-16]!
185; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
186; NONEON-NOSVE-NEXT:    ldr q0, [sp], #16
187; NONEON-NOSVE-NEXT:    ret
188  %res = shufflevector <4 x i16> %op1, <4 x i16> %op2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
189  ret <8 x i16> %res
190}
191
192define void @concat_v16i16(ptr %a, ptr %b, ptr %c)  {
193; CHECK-LABEL: concat_v16i16:
194; CHECK:       // %bb.0:
195; CHECK-NEXT:    ldr q0, [x1]
196; CHECK-NEXT:    ldr q1, [x0]
197; CHECK-NEXT:    stp q1, q0, [x2]
198; CHECK-NEXT:    ret
199;
200; NONEON-NOSVE-LABEL: concat_v16i16:
201; NONEON-NOSVE:       // %bb.0:
202; NONEON-NOSVE-NEXT:    ldr q0, [x1]
203; NONEON-NOSVE-NEXT:    ldr q1, [x0]
204; NONEON-NOSVE-NEXT:    stp q1, q0, [x2]
205; NONEON-NOSVE-NEXT:    ret
206  %op1 = load <8 x i16>, ptr %a
207  %op2 = load <8 x i16>, ptr %b
208  %res = shufflevector <8 x i16> %op1, <8 x i16> %op2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
209                                                                   i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
210  store <16 x i16> %res, ptr %c
211  ret void
212}
213
214define void @concat_v32i16(ptr %a, ptr %b, ptr %c) {
215; CHECK-LABEL: concat_v32i16:
216; CHECK:       // %bb.0:
217; CHECK-NEXT:    ldp q0, q1, [x1]
218; CHECK-NEXT:    ldp q3, q2, [x0]
219; CHECK-NEXT:    stp q0, q1, [x2, #32]
220; CHECK-NEXT:    stp q3, q2, [x2]
221; CHECK-NEXT:    ret
222;
223; NONEON-NOSVE-LABEL: concat_v32i16:
224; NONEON-NOSVE:       // %bb.0:
225; NONEON-NOSVE-NEXT:    ldp q0, q1, [x1]
226; NONEON-NOSVE-NEXT:    ldp q3, q2, [x0]
227; NONEON-NOSVE-NEXT:    stp q0, q1, [x2, #32]
228; NONEON-NOSVE-NEXT:    stp q3, q2, [x2]
229; NONEON-NOSVE-NEXT:    ret
230  %op1 = load <16 x i16>, ptr %a
231  %op2 = load <16 x i16>, ptr %b
232  %res = shufflevector <16 x i16> %op1, <16 x i16> %op2, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
233                                                                     i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15,
234                                                                     i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23,
235                                                                     i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
236  store <32 x i16> %res, ptr %c
237  ret void
238}
239
240;
241; i32
242;
243
244; Don't use SVE for 64-bit vectors.
245define <2 x i32> @concat_v2i32(<1 x i32> %op1, <1 x i32> %op2)  {
246; CHECK-LABEL: concat_v2i32:
247; CHECK:       // %bb.0:
248; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
249; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
250; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
251; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
252; CHECK-NEXT:    ret
253;
254; NONEON-NOSVE-LABEL: concat_v2i32:
255; NONEON-NOSVE:       // %bb.0:
256; NONEON-NOSVE-NEXT:    sub sp, sp, #32
257; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
258; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #8]
259; NONEON-NOSVE-NEXT:    ldr w9, [sp, #16]
260; NONEON-NOSVE-NEXT:    ldr w8, [sp, #8]
261; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #24]
262; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
263; NONEON-NOSVE-NEXT:    add sp, sp, #32
264; NONEON-NOSVE-NEXT:    ret
265  %res = shufflevector <1 x i32> %op1, <1 x i32> %op2, <2 x i32> <i32 0, i32 1>
266  ret <2 x i32> %res
267}
268
269; Don't use SVE for 128-bit vectors.
270define <4 x i32> @concat_v4i32(<2 x i32> %op1, <2 x i32> %op2)  {
271; CHECK-LABEL: concat_v4i32:
272; CHECK:       // %bb.0:
273; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
274; CHECK-NEXT:    ptrue p0.s, vl2
275; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
276; CHECK-NEXT:    splice z0.s, p0, { z0.s, z1.s }
277; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
278; CHECK-NEXT:    ret
279;
280; NONEON-NOSVE-LABEL: concat_v4i32:
281; NONEON-NOSVE:       // %bb.0:
282; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #-16]!
283; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
284; NONEON-NOSVE-NEXT:    ldr q0, [sp], #16
285; NONEON-NOSVE-NEXT:    ret
286  %res = shufflevector <2 x i32> %op1, <2 x i32> %op2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
287  ret <4 x i32> %res
288}
289
290define void @concat_v8i32(ptr %a, ptr %b, ptr %c)  {
291; CHECK-LABEL: concat_v8i32:
292; CHECK:       // %bb.0:
293; CHECK-NEXT:    ldr q0, [x1]
294; CHECK-NEXT:    ldr q1, [x0]
295; CHECK-NEXT:    stp q1, q0, [x2]
296; CHECK-NEXT:    ret
297;
298; NONEON-NOSVE-LABEL: concat_v8i32:
299; NONEON-NOSVE:       // %bb.0:
300; NONEON-NOSVE-NEXT:    ldr q0, [x1]
301; NONEON-NOSVE-NEXT:    ldr q1, [x0]
302; NONEON-NOSVE-NEXT:    stp q1, q0, [x2]
303; NONEON-NOSVE-NEXT:    ret
304  %op1 = load <4 x i32>, ptr %a
305  %op2 = load <4 x i32>, ptr %b
306  %res = shufflevector <4 x i32> %op1, <4 x i32> %op2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
307  store <8 x i32> %res, ptr %c
308  ret void
309}
310
311define void @concat_v16i32(ptr %a, ptr %b, ptr %c) {
312; CHECK-LABEL: concat_v16i32:
313; CHECK:       // %bb.0:
314; CHECK-NEXT:    ldp q0, q1, [x1]
315; CHECK-NEXT:    ldp q3, q2, [x0]
316; CHECK-NEXT:    stp q0, q1, [x2, #32]
317; CHECK-NEXT:    stp q3, q2, [x2]
318; CHECK-NEXT:    ret
319;
320; NONEON-NOSVE-LABEL: concat_v16i32:
321; NONEON-NOSVE:       // %bb.0:
322; NONEON-NOSVE-NEXT:    ldp q0, q1, [x1]
323; NONEON-NOSVE-NEXT:    ldp q3, q2, [x0]
324; NONEON-NOSVE-NEXT:    stp q0, q1, [x2, #32]
325; NONEON-NOSVE-NEXT:    stp q3, q2, [x2]
326; NONEON-NOSVE-NEXT:    ret
327  %op1 = load <8 x i32>, ptr %a
328  %op2 = load <8 x i32>, ptr %b
329  %res = shufflevector <8 x i32> %op1, <8 x i32> %op2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
330                                                                   i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
331  store <16 x i32> %res, ptr %c
332  ret void
333}
334
335;
336; i64
337;
338
339; Don't use SVE for 128-bit vectors.
340define <2 x i64> @concat_v2i64(<1 x i64> %op1, <1 x i64> %op2)  {
341; CHECK-LABEL: concat_v2i64:
342; CHECK:       // %bb.0:
343; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
344; CHECK-NEXT:    ptrue p0.d, vl1
345; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
346; CHECK-NEXT:    splice z0.d, p0, { z0.d, z1.d }
347; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
348; CHECK-NEXT:    ret
349;
350; NONEON-NOSVE-LABEL: concat_v2i64:
351; NONEON-NOSVE:       // %bb.0:
352; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #-16]!
353; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
354; NONEON-NOSVE-NEXT:    ldr q0, [sp], #16
355; NONEON-NOSVE-NEXT:    ret
356  %res = shufflevector <1 x i64> %op1, <1 x i64> %op2, <2 x i32> <i32 0, i32 1>
357  ret <2 x i64> %res
358}
359
360define void @concat_v4i64(ptr %a, ptr %b, ptr %c)  {
361; CHECK-LABEL: concat_v4i64:
362; CHECK:       // %bb.0:
363; CHECK-NEXT:    ldr q0, [x1]
364; CHECK-NEXT:    ldr q1, [x0]
365; CHECK-NEXT:    stp q1, q0, [x2]
366; CHECK-NEXT:    ret
367;
368; NONEON-NOSVE-LABEL: concat_v4i64:
369; NONEON-NOSVE:       // %bb.0:
370; NONEON-NOSVE-NEXT:    ldr q0, [x1]
371; NONEON-NOSVE-NEXT:    ldr q1, [x0]
372; NONEON-NOSVE-NEXT:    stp q1, q0, [x2]
373; NONEON-NOSVE-NEXT:    ret
374  %op1 = load <2 x i64>, ptr %a
375  %op2 = load <2 x i64>, ptr %b
376  %res = shufflevector <2 x i64> %op1, <2 x i64> %op2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
377  store <4 x i64> %res, ptr %c
378  ret void
379}
380
381define void @concat_v8i64(ptr %a, ptr %b, ptr %c) {
382; CHECK-LABEL: concat_v8i64:
383; CHECK:       // %bb.0:
384; CHECK-NEXT:    ldp q0, q1, [x1]
385; CHECK-NEXT:    ldp q3, q2, [x0]
386; CHECK-NEXT:    stp q0, q1, [x2, #32]
387; CHECK-NEXT:    stp q3, q2, [x2]
388; CHECK-NEXT:    ret
389;
390; NONEON-NOSVE-LABEL: concat_v8i64:
391; NONEON-NOSVE:       // %bb.0:
392; NONEON-NOSVE-NEXT:    ldp q0, q1, [x1]
393; NONEON-NOSVE-NEXT:    ldp q3, q2, [x0]
394; NONEON-NOSVE-NEXT:    stp q0, q1, [x2, #32]
395; NONEON-NOSVE-NEXT:    stp q3, q2, [x2]
396; NONEON-NOSVE-NEXT:    ret
397  %op1 = load <4 x i64>, ptr %a
398  %op2 = load <4 x i64>, ptr %b
399  %res = shufflevector <4 x i64> %op1, <4 x i64> %op2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
400  store <8 x i64> %res, ptr %c
401  ret void
402}
403
404;
405; f16
406;
407
408define <4 x half> @concat_v4f16(<2 x half> %op1, <2 x half> %op2)  {
409; CHECK-LABEL: concat_v4f16:
410; CHECK:       // %bb.0:
411; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
412; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
413; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
414; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
415; CHECK-NEXT:    ret
416;
417; NONEON-NOSVE-LABEL: concat_v4f16:
418; NONEON-NOSVE:       // %bb.0:
419; NONEON-NOSVE-NEXT:    sub sp, sp, #32
420; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
421; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #8]
422; NONEON-NOSVE-NEXT:    ldr w9, [sp, #16]
423; NONEON-NOSVE-NEXT:    ldr w8, [sp, #8]
424; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #24]
425; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
426; NONEON-NOSVE-NEXT:    add sp, sp, #32
427; NONEON-NOSVE-NEXT:    ret
428  %res = shufflevector <2 x half> %op1, <2 x half> %op2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
429  ret <4 x half> %res
430}
431
432define <8 x half> @concat_v8f16(<4 x half> %op1, <4 x half> %op2)  {
433; CHECK-LABEL: concat_v8f16:
434; CHECK:       // %bb.0:
435; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
436; CHECK-NEXT:    ptrue p0.h, vl4
437; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
438; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
439; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
440; CHECK-NEXT:    ret
441;
442; NONEON-NOSVE-LABEL: concat_v8f16:
443; NONEON-NOSVE:       // %bb.0:
444; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #-16]!
445; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
446; NONEON-NOSVE-NEXT:    ldr q0, [sp], #16
447; NONEON-NOSVE-NEXT:    ret
448  %res = shufflevector <4 x half> %op1, <4 x half> %op2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
449  ret <8 x half> %res
450}
451
452define void @concat_v16f16(ptr %a, ptr %b, ptr %c)  {
453; CHECK-LABEL: concat_v16f16:
454; CHECK:       // %bb.0:
455; CHECK-NEXT:    ldr q0, [x1]
456; CHECK-NEXT:    ldr q1, [x0]
457; CHECK-NEXT:    stp q1, q0, [x2]
458; CHECK-NEXT:    ret
459;
460; NONEON-NOSVE-LABEL: concat_v16f16:
461; NONEON-NOSVE:       // %bb.0:
462; NONEON-NOSVE-NEXT:    ldr q0, [x1]
463; NONEON-NOSVE-NEXT:    ldr q1, [x0]
464; NONEON-NOSVE-NEXT:    stp q1, q0, [x2]
465; NONEON-NOSVE-NEXT:    ret
466  %op1 = load <8 x half>, ptr %a
467  %op2 = load <8 x half>, ptr %b
468  %res = shufflevector <8 x half> %op1, <8 x half> %op2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
469                                                                     i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
470  store <16 x half> %res, ptr %c
471  ret void
472}
473
474define void @concat_v32f16(ptr %a, ptr %b, ptr %c) {
475; CHECK-LABEL: concat_v32f16:
476; CHECK:       // %bb.0:
477; CHECK-NEXT:    ldp q0, q1, [x1]
478; CHECK-NEXT:    ldp q3, q2, [x0]
479; CHECK-NEXT:    stp q0, q1, [x2, #32]
480; CHECK-NEXT:    stp q3, q2, [x2]
481; CHECK-NEXT:    ret
482;
483; NONEON-NOSVE-LABEL: concat_v32f16:
484; NONEON-NOSVE:       // %bb.0:
485; NONEON-NOSVE-NEXT:    ldp q0, q1, [x1]
486; NONEON-NOSVE-NEXT:    ldp q3, q2, [x0]
487; NONEON-NOSVE-NEXT:    stp q0, q1, [x2, #32]
488; NONEON-NOSVE-NEXT:    stp q3, q2, [x2]
489; NONEON-NOSVE-NEXT:    ret
490  %op1 = load <16 x half>, ptr %a
491  %op2 = load <16 x half>, ptr %b
492  %res = shufflevector <16 x half> %op1, <16 x half> %op2, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
493                                                                       i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15,
494                                                                       i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23,
495                                                                       i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
496  store <32 x half> %res, ptr %c
497  ret void
498}
499
500;
501; i32
502;
503
504; Don't use SVE for 64-bit vectors.
505define <2 x float> @concat_v2f32(<1 x float> %op1, <1 x float> %op2)  {
506; CHECK-LABEL: concat_v2f32:
507; CHECK:       // %bb.0:
508; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
509; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
510; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
511; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
512; CHECK-NEXT:    ret
513;
514; NONEON-NOSVE-LABEL: concat_v2f32:
515; NONEON-NOSVE:       // %bb.0:
516; NONEON-NOSVE-NEXT:    sub sp, sp, #32
517; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
518; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #8]
519; NONEON-NOSVE-NEXT:    ldr w9, [sp, #16]
520; NONEON-NOSVE-NEXT:    ldr w8, [sp, #8]
521; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #24]
522; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
523; NONEON-NOSVE-NEXT:    add sp, sp, #32
524; NONEON-NOSVE-NEXT:    ret
525  %res = shufflevector <1 x float> %op1, <1 x float> %op2, <2 x i32> <i32 0, i32 1>
526  ret <2 x float> %res
527}
528
529; Don't use SVE for 128-bit vectors.
530define <4 x float> @concat_v4f32(<2 x float> %op1, <2 x float> %op2)  {
531; CHECK-LABEL: concat_v4f32:
532; CHECK:       // %bb.0:
533; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
534; CHECK-NEXT:    ptrue p0.s, vl2
535; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
536; CHECK-NEXT:    splice z0.s, p0, { z0.s, z1.s }
537; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
538; CHECK-NEXT:    ret
539;
540; NONEON-NOSVE-LABEL: concat_v4f32:
541; NONEON-NOSVE:       // %bb.0:
542; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #-16]!
543; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
544; NONEON-NOSVE-NEXT:    ldr q0, [sp], #16
545; NONEON-NOSVE-NEXT:    ret
546  %res = shufflevector <2 x float> %op1, <2 x float> %op2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
547  ret <4 x float> %res
548}
549
550define void @concat_v8f32(ptr %a, ptr %b, ptr %c)  {
551; CHECK-LABEL: concat_v8f32:
552; CHECK:       // %bb.0:
553; CHECK-NEXT:    ldr q0, [x1]
554; CHECK-NEXT:    ldr q1, [x0]
555; CHECK-NEXT:    stp q1, q0, [x2]
556; CHECK-NEXT:    ret
557;
558; NONEON-NOSVE-LABEL: concat_v8f32:
559; NONEON-NOSVE:       // %bb.0:
560; NONEON-NOSVE-NEXT:    ldr q0, [x1]
561; NONEON-NOSVE-NEXT:    ldr q1, [x0]
562; NONEON-NOSVE-NEXT:    stp q1, q0, [x2]
563; NONEON-NOSVE-NEXT:    ret
564  %op1 = load <4 x float>, ptr %a
565  %op2 = load <4 x float>, ptr %b
566  %res = shufflevector <4 x float> %op1, <4 x float> %op2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
567  store <8 x float> %res, ptr %c
568  ret void
569}
570
571define void @concat_v16f32(ptr %a, ptr %b, ptr %c) {
572; CHECK-LABEL: concat_v16f32:
573; CHECK:       // %bb.0:
574; CHECK-NEXT:    ldp q0, q1, [x1]
575; CHECK-NEXT:    ldp q3, q2, [x0]
576; CHECK-NEXT:    stp q0, q1, [x2, #32]
577; CHECK-NEXT:    stp q3, q2, [x2]
578; CHECK-NEXT:    ret
579;
580; NONEON-NOSVE-LABEL: concat_v16f32:
581; NONEON-NOSVE:       // %bb.0:
582; NONEON-NOSVE-NEXT:    ldp q0, q1, [x1]
583; NONEON-NOSVE-NEXT:    ldp q3, q2, [x0]
584; NONEON-NOSVE-NEXT:    stp q0, q1, [x2, #32]
585; NONEON-NOSVE-NEXT:    stp q3, q2, [x2]
586; NONEON-NOSVE-NEXT:    ret
587  %op1 = load <8 x float>, ptr %a
588  %op2 = load <8 x float>, ptr %b
589  %res = shufflevector <8 x float> %op1, <8 x float> %op2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
590                                                                       i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
591  store <16 x float> %res, ptr %c
592  ret void
593}
594
595;
596; f64
597;
598
599; Don't use SVE for 128-bit vectors.
600define <2 x double> @concat_v2f64(<1 x double> %op1, <1 x double> %op2)  {
601; CHECK-LABEL: concat_v2f64:
602; CHECK:       // %bb.0:
603; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
604; CHECK-NEXT:    ptrue p0.d, vl1
605; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
606; CHECK-NEXT:    splice z0.d, p0, { z0.d, z1.d }
607; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
608; CHECK-NEXT:    ret
609;
610; NONEON-NOSVE-LABEL: concat_v2f64:
611; NONEON-NOSVE:       // %bb.0:
612; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #-16]!
613; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
614; NONEON-NOSVE-NEXT:    ldr q0, [sp], #16
615; NONEON-NOSVE-NEXT:    ret
616  %res = shufflevector <1 x double> %op1, <1 x double> %op2, <2 x i32> <i32 0, i32 1>
617  ret <2 x double> %res
618}
619
620define void @concat_v4f64(ptr %a, ptr %b, ptr %c)  {
621; CHECK-LABEL: concat_v4f64:
622; CHECK:       // %bb.0:
623; CHECK-NEXT:    ldr q0, [x1]
624; CHECK-NEXT:    ldr q1, [x0]
625; CHECK-NEXT:    stp q1, q0, [x2]
626; CHECK-NEXT:    ret
627;
628; NONEON-NOSVE-LABEL: concat_v4f64:
629; NONEON-NOSVE:       // %bb.0:
630; NONEON-NOSVE-NEXT:    ldr q0, [x1]
631; NONEON-NOSVE-NEXT:    ldr q1, [x0]
632; NONEON-NOSVE-NEXT:    stp q1, q0, [x2]
633; NONEON-NOSVE-NEXT:    ret
634  %op1 = load <2 x double>, ptr %a
635  %op2 = load <2 x double>, ptr %b
636  %res = shufflevector <2 x double> %op1, <2 x double> %op2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
637  store <4 x double> %res, ptr %c
638  ret void
639}
640
641define void @concat_v8f64(ptr %a, ptr %b, ptr %c) {
642; CHECK-LABEL: concat_v8f64:
643; CHECK:       // %bb.0:
644; CHECK-NEXT:    ldp q0, q1, [x1]
645; CHECK-NEXT:    ldp q3, q2, [x0]
646; CHECK-NEXT:    stp q0, q1, [x2, #32]
647; CHECK-NEXT:    stp q3, q2, [x2]
648; CHECK-NEXT:    ret
649;
650; NONEON-NOSVE-LABEL: concat_v8f64:
651; NONEON-NOSVE:       // %bb.0:
652; NONEON-NOSVE-NEXT:    ldp q0, q1, [x1]
653; NONEON-NOSVE-NEXT:    ldp q3, q2, [x0]
654; NONEON-NOSVE-NEXT:    stp q0, q1, [x2, #32]
655; NONEON-NOSVE-NEXT:    stp q3, q2, [x2]
656; NONEON-NOSVE-NEXT:    ret
657  %op1 = load <4 x double>, ptr %a
658  %op2 = load <4 x double>, ptr %b
659  %res = shufflevector <4 x double> %op1, <4 x double> %op2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
660  store <8 x double> %res, ptr %c
661  ret void
662}
663
664;
665; undef
666;
667
668define void @concat_v32i8_undef(ptr %a, ptr %b)  {
669; CHECK-LABEL: concat_v32i8_undef:
670; CHECK:       // %bb.0:
671; CHECK-NEXT:    ldr q0, [x0]
672; CHECK-NEXT:    str q0, [x1]
673; CHECK-NEXT:    ret
674;
675; NONEON-NOSVE-LABEL: concat_v32i8_undef:
676; NONEON-NOSVE:       // %bb.0:
677; NONEON-NOSVE-NEXT:    ldr q0, [x0]
678; NONEON-NOSVE-NEXT:    str q0, [x1]
679; NONEON-NOSVE-NEXT:    ret
680  %op1 = load <16 x i8>, ptr %a
681  %res = shufflevector <16 x i8> %op1, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
682                                                                    i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15,
683                                                                    i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23,
684                                                                    i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
685  store <32 x i8> %res, ptr %b
686  ret void
687}
688
689define void @concat_v16i16_undef(ptr %a, ptr %b)  {
690; CHECK-LABEL: concat_v16i16_undef:
691; CHECK:       // %bb.0:
692; CHECK-NEXT:    ldr q0, [x0]
693; CHECK-NEXT:    str q0, [x1]
694; CHECK-NEXT:    ret
695;
696; NONEON-NOSVE-LABEL: concat_v16i16_undef:
697; NONEON-NOSVE:       // %bb.0:
698; NONEON-NOSVE-NEXT:    ldr q0, [x0]
699; NONEON-NOSVE-NEXT:    str q0, [x1]
700; NONEON-NOSVE-NEXT:    ret
701  %op1 = load <8 x i16>, ptr %a
702  %res = shufflevector <8 x i16> %op1, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
703                                                                    i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
704  store <16 x i16> %res, ptr %b
705  ret void
706}
707
708define void @concat_v8i32_undef(ptr %a, ptr %b)  {
709; CHECK-LABEL: concat_v8i32_undef:
710; CHECK:       // %bb.0:
711; CHECK-NEXT:    ldr q0, [x0]
712; CHECK-NEXT:    str q0, [x1]
713; CHECK-NEXT:    ret
714;
715; NONEON-NOSVE-LABEL: concat_v8i32_undef:
716; NONEON-NOSVE:       // %bb.0:
717; NONEON-NOSVE-NEXT:    ldr q0, [x0]
718; NONEON-NOSVE-NEXT:    str q0, [x1]
719; NONEON-NOSVE-NEXT:    ret
720  %op1 = load <4 x i32>, ptr %a
721  %res = shufflevector <4 x i32> %op1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
722  store <8 x i32> %res, ptr %b
723  ret void
724}
725
726define void @concat_v4i64_undef(ptr %a, ptr %b)  {
727; CHECK-LABEL: concat_v4i64_undef:
728; CHECK:       // %bb.0:
729; CHECK-NEXT:    ldr q0, [x0]
730; CHECK-NEXT:    str q0, [x1]
731; CHECK-NEXT:    ret
732;
733; NONEON-NOSVE-LABEL: concat_v4i64_undef:
734; NONEON-NOSVE:       // %bb.0:
735; NONEON-NOSVE-NEXT:    ldr q0, [x0]
736; NONEON-NOSVE-NEXT:    str q0, [x1]
737; NONEON-NOSVE-NEXT:    ret
738  %op1 = load <2 x i64>, ptr %a
739  %res = shufflevector <2 x i64> %op1, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
740  store <4 x i64> %res, ptr %b
741  ret void
742}
743
744;
745; > 2 operands
746;
747
748define void @concat_v32i8_4op(ptr %a, ptr %b)  {
749; CHECK-LABEL: concat_v32i8_4op:
750; CHECK:       // %bb.0:
751; CHECK-NEXT:    ldr d0, [x0]
752; CHECK-NEXT:    str q0, [x1]
753; CHECK-NEXT:    ret
754;
755; NONEON-NOSVE-LABEL: concat_v32i8_4op:
756; NONEON-NOSVE:       // %bb.0:
757; NONEON-NOSVE-NEXT:    ldr d0, [x0]
758; NONEON-NOSVE-NEXT:    str d0, [sp, #-16]!
759; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
760; NONEON-NOSVE-NEXT:    ldr q0, [sp]
761; NONEON-NOSVE-NEXT:    str q0, [x1]
762; NONEON-NOSVE-NEXT:    add sp, sp, #16
763; NONEON-NOSVE-NEXT:    ret
764  %op1 = load <8 x i8>, ptr %a
765  %shuffle = shufflevector <8 x i8> %op1, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
766                                                                      i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
767  %res = shufflevector <16 x i8> %shuffle, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
768                                                                        i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15,
769                                                                        i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23,
770                                                                        i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
771  store <32 x i8> %res, ptr %b
772  ret void
773}
774
775define void @concat_v16i16_4op(ptr %a, ptr %b)  {
776; CHECK-LABEL: concat_v16i16_4op:
777; CHECK:       // %bb.0:
778; CHECK-NEXT:    ldr d0, [x0]
779; CHECK-NEXT:    str q0, [x1]
780; CHECK-NEXT:    ret
781;
782; NONEON-NOSVE-LABEL: concat_v16i16_4op:
783; NONEON-NOSVE:       // %bb.0:
784; NONEON-NOSVE-NEXT:    ldr d0, [x0]
785; NONEON-NOSVE-NEXT:    str d0, [sp, #-16]!
786; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
787; NONEON-NOSVE-NEXT:    ldr q0, [sp]
788; NONEON-NOSVE-NEXT:    str q0, [x1]
789; NONEON-NOSVE-NEXT:    add sp, sp, #16
790; NONEON-NOSVE-NEXT:    ret
791  %op1 = load <4 x i16>, ptr %a
792  %shuffle = shufflevector <4 x i16> %op1, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
793  %res = shufflevector <8 x i16> %shuffle, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
794                                                                        i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
795  store <16 x i16> %res, ptr %b
796  ret void
797}
798
799define void @concat_v8i32_4op(ptr %a, ptr %b)  {
800; CHECK-LABEL: concat_v8i32_4op:
801; CHECK:       // %bb.0:
802; CHECK-NEXT:    ldr d0, [x0]
803; CHECK-NEXT:    str q0, [x1]
804; CHECK-NEXT:    ret
805;
806; NONEON-NOSVE-LABEL: concat_v8i32_4op:
807; NONEON-NOSVE:       // %bb.0:
808; NONEON-NOSVE-NEXT:    ldr d0, [x0]
809; NONEON-NOSVE-NEXT:    str d0, [sp, #-16]!
810; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
811; NONEON-NOSVE-NEXT:    ldr q0, [sp]
812; NONEON-NOSVE-NEXT:    str q0, [x1]
813; NONEON-NOSVE-NEXT:    add sp, sp, #16
814; NONEON-NOSVE-NEXT:    ret
815  %op1 = load <2 x i32>, ptr %a
816  %shuffle = shufflevector <2 x i32> %op1, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
817  %res = shufflevector <4 x i32> %shuffle, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
818  store <8 x i32> %res, ptr %b
819  ret void
820}
821
822define void @concat_v4i64_4op(ptr %a, ptr %b)  {
823; CHECK-LABEL: concat_v4i64_4op:
824; CHECK:       // %bb.0:
825; CHECK-NEXT:    ldr d0, [x0]
826; CHECK-NEXT:    str q0, [x1]
827; CHECK-NEXT:    ret
828;
829; NONEON-NOSVE-LABEL: concat_v4i64_4op:
830; NONEON-NOSVE:       // %bb.0:
831; NONEON-NOSVE-NEXT:    ldr d0, [x0]
832; NONEON-NOSVE-NEXT:    str d0, [sp, #-16]!
833; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
834; NONEON-NOSVE-NEXT:    ldr q0, [sp]
835; NONEON-NOSVE-NEXT:    str q0, [x1]
836; NONEON-NOSVE-NEXT:    add sp, sp, #16
837; NONEON-NOSVE-NEXT:    ret
838  %op1 = load <1 x i64>, ptr %a
839  %shuffle = shufflevector <1 x i64> %op1, <1 x i64> undef, <2 x i32> <i32 0, i32 1>
840  %res = shufflevector <2 x i64> %shuffle, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
841  store <4 x i64> %res, ptr %b
842  ret void
843}
844