xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select-matmul-fp64.ll (revision fadea4413ecbfffa4d28ad8298e0628165b543f1)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve,+f64mm < %s | FileCheck %s
3
4;
5; TRN1Q
6;
7
8define <vscale x 16 x i8> @trn1_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
9; CHECK-LABEL: trn1_i8:
10; CHECK:       // %bb.0:
11; CHECK-NEXT:    trn1 z0.q, z0.q, z1.q
12; CHECK-NEXT:    ret
13  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.trn1q.nxv16i8(<vscale x 16 x i8> %a,
14                                                                 <vscale x 16 x i8> %b)
15  ret <vscale x 16 x i8> %out
16}
17
18define <vscale x 8 x i16> @trn1_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) nounwind {
19; CHECK-LABEL: trn1_i16:
20; CHECK:       // %bb.0:
21; CHECK-NEXT:    trn1 z0.q, z0.q, z1.q
22; CHECK-NEXT:    ret
23  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.trn1q.nxv8i16(<vscale x 8 x i16> %a,
24                                                                 <vscale x 8 x i16> %b)
25  ret <vscale x 8 x i16> %out
26}
27
28define <vscale x 4 x i32> @trn1_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) nounwind {
29; CHECK-LABEL: trn1_i32:
30; CHECK:       // %bb.0:
31; CHECK-NEXT:    trn1 z0.q, z0.q, z1.q
32; CHECK-NEXT:    ret
33  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.trn1q.nxv4i32(<vscale x 4 x i32> %a,
34                                                                 <vscale x 4 x i32> %b)
35  ret <vscale x 4 x i32> %out
36}
37
38define <vscale x 2 x i64> @trn1_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) nounwind {
39; CHECK-LABEL: trn1_i64:
40; CHECK:       // %bb.0:
41; CHECK-NEXT:    trn1 z0.q, z0.q, z1.q
42; CHECK-NEXT:    ret
43  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.trn1q.nxv2i64(<vscale x 2 x i64> %a,
44                                                                 <vscale x 2 x i64> %b)
45  ret <vscale x 2 x i64> %out
46}
47
48define <vscale x 8 x half> @trn1_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) nounwind {
49; CHECK-LABEL: trn1_f16:
50; CHECK:       // %bb.0:
51; CHECK-NEXT:    trn1 z0.q, z0.q, z1.q
52; CHECK-NEXT:    ret
53  %out = call <vscale x 8 x half> @llvm.aarch64.sve.trn1q.nxv8f16(<vscale x 8 x half> %a,
54                                                                  <vscale x 8 x half> %b)
55  ret <vscale x 8 x half> %out
56}
57
58define <vscale x 8 x bfloat> @trn1_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) nounwind #0 {
59; CHECK-LABEL: trn1_bf16:
60; CHECK:       // %bb.0:
61; CHECK-NEXT:    trn1 z0.q, z0.q, z1.q
62; CHECK-NEXT:    ret
63  %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.trn1q.nxv8bf16(<vscale x 8 x bfloat> %a,
64                                                                     <vscale x 8 x bfloat> %b)
65  ret <vscale x 8 x bfloat> %out
66}
67
68define <vscale x 4 x float> @trn1_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) nounwind {
69; CHECK-LABEL: trn1_f32:
70; CHECK:       // %bb.0:
71; CHECK-NEXT:    trn1 z0.q, z0.q, z1.q
72; CHECK-NEXT:    ret
73  %out = call <vscale x 4 x float> @llvm.aarch64.sve.trn1q.nxv4f32(<vscale x 4 x float> %a,
74                                                                   <vscale x 4 x float> %b)
75  ret <vscale x 4 x float> %out
76}
77
78define <vscale x 2 x double> @trn1_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) nounwind {
79; CHECK-LABEL: trn1_f64:
80; CHECK:       // %bb.0:
81; CHECK-NEXT:    trn1 z0.q, z0.q, z1.q
82; CHECK-NEXT:    ret
83  %out = call <vscale x 2 x double> @llvm.aarch64.sve.trn1q.nxv2f64(<vscale x 2 x double> %a,
84                                                                    <vscale x 2 x double> %b)
85  ret <vscale x 2 x double> %out
86}
87
88;
89; TRN2Q
90;
91
92define <vscale x 16 x i8> @trn2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
93; CHECK-LABEL: trn2_i8:
94; CHECK:       // %bb.0:
95; CHECK-NEXT:    trn2 z0.q, z0.q, z1.q
96; CHECK-NEXT:    ret
97  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.trn2q.nxv16i8(<vscale x 16 x i8> %a,
98                                                                 <vscale x 16 x i8> %b)
99  ret <vscale x 16 x i8> %out
100}
101
102define <vscale x 8 x i16> @trn2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) nounwind {
103; CHECK-LABEL: trn2_i16:
104; CHECK:       // %bb.0:
105; CHECK-NEXT:    trn2 z0.q, z0.q, z1.q
106; CHECK-NEXT:    ret
107  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.trn2q.nxv8i16(<vscale x 8 x i16> %a,
108                                                                 <vscale x 8 x i16> %b)
109  ret <vscale x 8 x i16> %out
110}
111
112define <vscale x 4 x i32> @trn2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) nounwind {
113; CHECK-LABEL: trn2_i32:
114; CHECK:       // %bb.0:
115; CHECK-NEXT:    trn2 z0.q, z0.q, z1.q
116; CHECK-NEXT:    ret
117  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.trn2q.nxv4i32(<vscale x 4 x i32> %a,
118                                                                 <vscale x 4 x i32> %b)
119  ret <vscale x 4 x i32> %out
120}
121
122define <vscale x 2 x i64> @trn2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) nounwind {
123; CHECK-LABEL: trn2_i64:
124; CHECK:       // %bb.0:
125; CHECK-NEXT:    trn2 z0.q, z0.q, z1.q
126; CHECK-NEXT:    ret
127  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.trn2q.nxv2i64(<vscale x 2 x i64> %a,
128                                                                 <vscale x 2 x i64> %b)
129  ret <vscale x 2 x i64> %out
130}
131
132define <vscale x 8 x half> @trn2_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) nounwind {
133; CHECK-LABEL: trn2_f16:
134; CHECK:       // %bb.0:
135; CHECK-NEXT:    trn2 z0.q, z0.q, z1.q
136; CHECK-NEXT:    ret
137  %out = call <vscale x 8 x half> @llvm.aarch64.sve.trn2q.nxv8f16(<vscale x 8 x half> %a,
138                                                                  <vscale x 8 x half> %b)
139  ret <vscale x 8 x half> %out
140}
141
142define <vscale x 8 x bfloat> @trn2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) nounwind #0 {
143; CHECK-LABEL: trn2_bf16:
144; CHECK:       // %bb.0:
145; CHECK-NEXT:    trn2 z0.q, z0.q, z1.q
146; CHECK-NEXT:    ret
147  %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.trn2q.nxv8bf16(<vscale x 8 x bfloat> %a,
148                                                                     <vscale x 8 x bfloat> %b)
149  ret <vscale x 8 x bfloat> %out
150}
151
152define <vscale x 4 x float> @trn2_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) nounwind {
153; CHECK-LABEL: trn2_f32:
154; CHECK:       // %bb.0:
155; CHECK-NEXT:    trn2 z0.q, z0.q, z1.q
156; CHECK-NEXT:    ret
157  %out = call <vscale x 4 x float> @llvm.aarch64.sve.trn2q.nxv4f32(<vscale x 4 x float> %a,
158                                                                   <vscale x 4 x float> %b)
159  ret <vscale x 4 x float> %out
160}
161
162define <vscale x 2 x double> @trn2_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) nounwind {
163; CHECK-LABEL: trn2_f64:
164; CHECK:       // %bb.0:
165; CHECK-NEXT:    trn2 z0.q, z0.q, z1.q
166; CHECK-NEXT:    ret
167  %out = call <vscale x 2 x double> @llvm.aarch64.sve.trn2q.nxv2f64(<vscale x 2 x double> %a,
168                                                                    <vscale x 2 x double> %b)
169  ret <vscale x 2 x double> %out
170}
171
172;
173; UZP1Q
174;
175
176define <vscale x 16 x i8> @uzp1_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
177; CHECK-LABEL: uzp1_i8:
178; CHECK:       // %bb.0:
179; CHECK-NEXT:    uzp1 z0.q, z0.q, z1.q
180; CHECK-NEXT:    ret
181  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uzp1q.nxv16i8(<vscale x 16 x i8> %a,
182                                                                 <vscale x 16 x i8> %b)
183  ret <vscale x 16 x i8> %out
184}
185
186define <vscale x 8 x i16> @uzp1_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) nounwind {
187; CHECK-LABEL: uzp1_i16:
188; CHECK:       // %bb.0:
189; CHECK-NEXT:    uzp1 z0.q, z0.q, z1.q
190; CHECK-NEXT:    ret
191  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uzp1q.nxv8i16(<vscale x 8 x i16> %a,
192                                                                 <vscale x 8 x i16> %b)
193  ret <vscale x 8 x i16> %out
194}
195
196define <vscale x 4 x i32> @uzp1_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) nounwind {
197; CHECK-LABEL: uzp1_i32:
198; CHECK:       // %bb.0:
199; CHECK-NEXT:    uzp1 z0.q, z0.q, z1.q
200; CHECK-NEXT:    ret
201  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uzp1q.nxv4i32(<vscale x 4 x i32> %a,
202                                                                 <vscale x 4 x i32> %b)
203  ret <vscale x 4 x i32> %out
204}
205
206define <vscale x 2 x i64> @uzp1_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) nounwind {
207; CHECK-LABEL: uzp1_i64:
208; CHECK:       // %bb.0:
209; CHECK-NEXT:    uzp1 z0.q, z0.q, z1.q
210; CHECK-NEXT:    ret
211  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uzp1q.nxv2i64(<vscale x 2 x i64> %a,
212                                                                 <vscale x 2 x i64> %b)
213  ret <vscale x 2 x i64> %out
214}
215
216define <vscale x 8 x half> @uzp1_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) nounwind {
217; CHECK-LABEL: uzp1_f16:
218; CHECK:       // %bb.0:
219; CHECK-NEXT:    uzp1 z0.q, z0.q, z1.q
220; CHECK-NEXT:    ret
221  %out = call <vscale x 8 x half> @llvm.aarch64.sve.uzp1q.nxv8f16(<vscale x 8 x half> %a,
222                                                                  <vscale x 8 x half> %b)
223  ret <vscale x 8 x half> %out
224}
225
226define <vscale x 8 x bfloat> @uzp1_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) nounwind #0 {
227; CHECK-LABEL: uzp1_bf16:
228; CHECK:       // %bb.0:
229; CHECK-NEXT:    uzp1 z0.q, z0.q, z1.q
230; CHECK-NEXT:    ret
231  %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzp1q.nxv8bf16(<vscale x 8 x bfloat> %a,
232                                                                     <vscale x 8 x bfloat> %b)
233  ret <vscale x 8 x bfloat> %out
234}
235
236define <vscale x 4 x float> @uzp1_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) nounwind {
237; CHECK-LABEL: uzp1_f32:
238; CHECK:       // %bb.0:
239; CHECK-NEXT:    uzp1 z0.q, z0.q, z1.q
240; CHECK-NEXT:    ret
241  %out = call <vscale x 4 x float> @llvm.aarch64.sve.uzp1q.nxv4f32(<vscale x 4 x float> %a,
242                                                                   <vscale x 4 x float> %b)
243  ret <vscale x 4 x float> %out
244}
245
246define <vscale x 2 x double> @uzp1_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) nounwind {
247; CHECK-LABEL: uzp1_f64:
248; CHECK:       // %bb.0:
249; CHECK-NEXT:    uzp1 z0.q, z0.q, z1.q
250; CHECK-NEXT:    ret
251  %out = call <vscale x 2 x double> @llvm.aarch64.sve.uzp1q.nxv2f64(<vscale x 2 x double> %a,
252                                                                    <vscale x 2 x double> %b)
253  ret <vscale x 2 x double> %out
254}
255
256;
257; UZP2Q
258;
259
260define <vscale x 16 x i8> @uzp2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
261; CHECK-LABEL: uzp2_i8:
262; CHECK:       // %bb.0:
263; CHECK-NEXT:    uzp2 z0.q, z0.q, z1.q
264; CHECK-NEXT:    ret
265  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uzp2q.nxv16i8(<vscale x 16 x i8> %a,
266                                                                 <vscale x 16 x i8> %b)
267  ret <vscale x 16 x i8> %out
268}
269
270define <vscale x 8 x i16> @uzp2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) nounwind {
271; CHECK-LABEL: uzp2_i16:
272; CHECK:       // %bb.0:
273; CHECK-NEXT:    uzp2 z0.q, z0.q, z1.q
274; CHECK-NEXT:    ret
275  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uzp2q.nxv8i16(<vscale x 8 x i16> %a,
276                                                                 <vscale x 8 x i16> %b)
277  ret <vscale x 8 x i16> %out
278}
279
280define <vscale x 4 x i32> @uzp2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) nounwind {
281; CHECK-LABEL: uzp2_i32:
282; CHECK:       // %bb.0:
283; CHECK-NEXT:    uzp2 z0.q, z0.q, z1.q
284; CHECK-NEXT:    ret
285  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uzp2q.nxv4i32(<vscale x 4 x i32> %a,
286                                                                 <vscale x 4 x i32> %b)
287  ret <vscale x 4 x i32> %out
288}
289
290define <vscale x 2 x i64> @uzp2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) nounwind {
291; CHECK-LABEL: uzp2_i64:
292; CHECK:       // %bb.0:
293; CHECK-NEXT:    uzp2 z0.q, z0.q, z1.q
294; CHECK-NEXT:    ret
295  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uzp2q.nxv2i64(<vscale x 2 x i64> %a,
296                                                                 <vscale x 2 x i64> %b)
297  ret <vscale x 2 x i64> %out
298}
299
300define <vscale x 8 x half> @uzp2_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) nounwind {
301; CHECK-LABEL: uzp2_f16:
302; CHECK:       // %bb.0:
303; CHECK-NEXT:    uzp2 z0.q, z0.q, z1.q
304; CHECK-NEXT:    ret
305  %out = call <vscale x 8 x half> @llvm.aarch64.sve.uzp2q.nxv8f16(<vscale x 8 x half> %a,
306                                                                  <vscale x 8 x half> %b)
307  ret <vscale x 8 x half> %out
308}
309
310define <vscale x 8 x bfloat> @uzp2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) nounwind #0 {
311; CHECK-LABEL: uzp2_bf16:
312; CHECK:       // %bb.0:
313; CHECK-NEXT:    uzp2 z0.q, z0.q, z1.q
314; CHECK-NEXT:    ret
315  %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzp2q.nxv8bf16(<vscale x 8 x bfloat> %a,
316                                                                     <vscale x 8 x bfloat> %b)
317  ret <vscale x 8 x bfloat> %out
318}
319
320define <vscale x 4 x float> @uzp2_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) nounwind {
321; CHECK-LABEL: uzp2_f32:
322; CHECK:       // %bb.0:
323; CHECK-NEXT:    uzp2 z0.q, z0.q, z1.q
324; CHECK-NEXT:    ret
325  %out = call <vscale x 4 x float> @llvm.aarch64.sve.uzp2q.nxv4f32(<vscale x 4 x float> %a,
326                                                                   <vscale x 4 x float> %b)
327  ret <vscale x 4 x float> %out
328}
329
330define <vscale x 2 x double> @uzp2_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) nounwind {
331; CHECK-LABEL: uzp2_f64:
332; CHECK:       // %bb.0:
333; CHECK-NEXT:    uzp2 z0.q, z0.q, z1.q
334; CHECK-NEXT:    ret
335  %out = call <vscale x 2 x double> @llvm.aarch64.sve.uzp2q.nxv2f64(<vscale x 2 x double> %a,
336                                                                    <vscale x 2 x double> %b)
337  ret <vscale x 2 x double> %out
338}
339
340;
341; ZIP1Q
342;
343
344define <vscale x 16 x i8> @zip1_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
345; CHECK-LABEL: zip1_i8:
346; CHECK:       // %bb.0:
347; CHECK-NEXT:    zip1 z0.q, z0.q, z1.q
348; CHECK-NEXT:    ret
349  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.zip1q.nxv16i8(<vscale x 16 x i8> %a,
350                                                                <vscale x 16 x i8> %b)
351  ret <vscale x 16 x i8> %out
352}
353
354define <vscale x 8 x i16> @zip1_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) nounwind {
355; CHECK-LABEL: zip1_i16:
356; CHECK:       // %bb.0:
357; CHECK-NEXT:    zip1 z0.q, z0.q, z1.q
358; CHECK-NEXT:    ret
359  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.zip1q.nxv8i16(<vscale x 8 x i16> %a,
360                                                                 <vscale x 8 x i16> %b)
361  ret <vscale x 8 x i16> %out
362}
363
364define <vscale x 4 x i32> @zip1_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) nounwind {
365; CHECK-LABEL: zip1_i32:
366; CHECK:       // %bb.0:
367; CHECK-NEXT:    zip1 z0.q, z0.q, z1.q
368; CHECK-NEXT:    ret
369  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.zip1q.nxv4i32(<vscale x 4 x i32> %a,
370                                                                 <vscale x 4 x i32> %b)
371  ret <vscale x 4 x i32> %out
372}
373
374define <vscale x 2 x i64> @zip1_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) nounwind {
375; CHECK-LABEL: zip1_i64:
376; CHECK:       // %bb.0:
377; CHECK-NEXT:    zip1 z0.q, z0.q, z1.q
378; CHECK-NEXT:    ret
379  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.zip1q.nxv2i64(<vscale x 2 x i64> %a,
380                                                                 <vscale x 2 x i64> %b)
381  ret <vscale x 2 x i64> %out
382}
383
384define <vscale x 8 x half> @zip1_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) nounwind {
385; CHECK-LABEL: zip1_f16:
386; CHECK:       // %bb.0:
387; CHECK-NEXT:    zip1 z0.q, z0.q, z1.q
388; CHECK-NEXT:    ret
389  %out = call <vscale x 8 x half> @llvm.aarch64.sve.zip1q.nxv8f16(<vscale x 8 x half> %a,
390                                                                  <vscale x 8 x half> %b)
391  ret <vscale x 8 x half> %out
392}
393
394define <vscale x 8 x bfloat> @zip1_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) nounwind #0 {
395; CHECK-LABEL: zip1_bf16:
396; CHECK:       // %bb.0:
397; CHECK-NEXT:    zip1 z0.q, z0.q, z1.q
398; CHECK-NEXT:    ret
399  %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.zip1q.nxv8bf16(<vscale x 8 x bfloat> %a,
400                                                                     <vscale x 8 x bfloat> %b)
401  ret <vscale x 8 x bfloat> %out
402}
403
404define <vscale x 4 x float> @zip1_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) nounwind {
405; CHECK-LABEL: zip1_f32:
406; CHECK:       // %bb.0:
407; CHECK-NEXT:    zip1 z0.q, z0.q, z1.q
408; CHECK-NEXT:    ret
409  %out = call <vscale x 4 x float> @llvm.aarch64.sve.zip1q.nxv4f32(<vscale x 4 x float> %a,
410                                                                   <vscale x 4 x float> %b)
411  ret <vscale x 4 x float> %out
412}
413
414define <vscale x 2 x double> @zip1_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) nounwind {
415; CHECK-LABEL: zip1_f64:
416; CHECK:       // %bb.0:
417; CHECK-NEXT:    zip1 z0.q, z0.q, z1.q
418; CHECK-NEXT:    ret
419  %out = call <vscale x 2 x double> @llvm.aarch64.sve.zip1q.nxv2f64(<vscale x 2 x double> %a,
420                                                                    <vscale x 2 x double> %b)
421  ret <vscale x 2 x double> %out
422}
423
424;
425; ZIP2Q
426;
427
428define <vscale x 16 x i8> @zip2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
429; CHECK-LABEL: zip2_i8:
430; CHECK:       // %bb.0:
431; CHECK-NEXT:    zip2 z0.q, z0.q, z1.q
432; CHECK-NEXT:    ret
433  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.zip2q.nxv16i8(<vscale x 16 x i8> %a,
434                                                                 <vscale x 16 x i8> %b)
435  ret <vscale x 16 x i8> %out
436}
437
438define <vscale x 8 x i16> @zip2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) nounwind {
439; CHECK-LABEL: zip2_i16:
440; CHECK:       // %bb.0:
441; CHECK-NEXT:    zip2 z0.q, z0.q, z1.q
442; CHECK-NEXT:    ret
443  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.zip2q.nxv8i16(<vscale x 8 x i16> %a,
444                                                                 <vscale x 8 x i16> %b)
445  ret <vscale x 8 x i16> %out
446}
447
448define <vscale x 4 x i32> @zip2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) nounwind {
449; CHECK-LABEL: zip2_i32:
450; CHECK:       // %bb.0:
451; CHECK-NEXT:    zip2 z0.q, z0.q, z1.q
452; CHECK-NEXT:    ret
453  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.zip2q.nxv4i32(<vscale x 4 x i32> %a,
454                                                                 <vscale x 4 x i32> %b)
455  ret <vscale x 4 x i32> %out
456}
457
458define <vscale x 2 x i64> @zip2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) nounwind {
459; CHECK-LABEL: zip2_i64:
460; CHECK:       // %bb.0:
461; CHECK-NEXT:    zip2 z0.q, z0.q, z1.q
462; CHECK-NEXT:    ret
463  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.zip2q.nxv2i64(<vscale x 2 x i64> %a,
464                                                                 <vscale x 2 x i64> %b)
465  ret <vscale x 2 x i64> %out
466}
467
468define <vscale x 8 x half> @zip2_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) nounwind {
469; CHECK-LABEL: zip2_f16:
470; CHECK:       // %bb.0:
471; CHECK-NEXT:    zip2 z0.q, z0.q, z1.q
472; CHECK-NEXT:    ret
473  %out = call <vscale x 8 x half> @llvm.aarch64.sve.zip2q.nxv8f16(<vscale x 8 x half> %a,
474                                                                  <vscale x 8 x half> %b)
475  ret <vscale x 8 x half> %out
476}
477
478define <vscale x 8 x bfloat> @zip2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) nounwind #0 {
479; CHECK-LABEL: zip2_bf16:
480; CHECK:       // %bb.0:
481; CHECK-NEXT:    zip2 z0.q, z0.q, z1.q
482; CHECK-NEXT:    ret
483  %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.zip2q.nxv8bf16(<vscale x 8 x bfloat> %a,
484                                                                     <vscale x 8 x bfloat> %b)
485  ret <vscale x 8 x bfloat> %out
486}
487
488define <vscale x 4 x float> @zip2_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) nounwind {
489; CHECK-LABEL: zip2_f32:
490; CHECK:       // %bb.0:
491; CHECK-NEXT:    zip2 z0.q, z0.q, z1.q
492; CHECK-NEXT:    ret
493  %out = call <vscale x 4 x float> @llvm.aarch64.sve.zip2q.nxv4f32(<vscale x 4 x float> %a,
494                                                                   <vscale x 4 x float> %b)
495  ret <vscale x 4 x float> %out
496}
497
498define <vscale x 2 x double> @zip2_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) nounwind {
499; CHECK-LABEL: zip2_f64:
500; CHECK:       // %bb.0:
501; CHECK-NEXT:    zip2 z0.q, z0.q, z1.q
502; CHECK-NEXT:    ret
503  %out = call <vscale x 2 x double> @llvm.aarch64.sve.zip2q.nxv2f64(<vscale x 2 x double> %a,
504                                                                    <vscale x 2 x double> %b)
505  ret <vscale x 2 x double> %out
506}
507
508
509declare <vscale x 2 x double> @llvm.aarch64.sve.trn1q.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
510declare <vscale x 2 x i64> @llvm.aarch64.sve.trn1q.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
511declare <vscale x 4 x float> @llvm.aarch64.sve.trn1q.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
512declare <vscale x 4 x i32> @llvm.aarch64.sve.trn1q.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
513declare <vscale x 8 x bfloat> @llvm.aarch64.sve.trn1q.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
514declare <vscale x 8 x half> @llvm.aarch64.sve.trn1q.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
515declare <vscale x 8 x i16> @llvm.aarch64.sve.trn1q.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
516declare <vscale x 16 x i8> @llvm.aarch64.sve.trn1q.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
517
518declare <vscale x 2 x double> @llvm.aarch64.sve.trn2q.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
519declare <vscale x 2 x i64> @llvm.aarch64.sve.trn2q.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
520declare <vscale x 4 x float> @llvm.aarch64.sve.trn2q.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
521declare <vscale x 4 x i32> @llvm.aarch64.sve.trn2q.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
522declare <vscale x 8 x bfloat> @llvm.aarch64.sve.trn2q.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
523declare <vscale x 8 x half> @llvm.aarch64.sve.trn2q.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
524declare <vscale x 8 x i16> @llvm.aarch64.sve.trn2q.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
525declare <vscale x 16 x i8> @llvm.aarch64.sve.trn2q.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
526
527declare <vscale x 2 x double> @llvm.aarch64.sve.uzp1q.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
528declare <vscale x 2 x i64> @llvm.aarch64.sve.uzp1q.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
529declare <vscale x 4 x float> @llvm.aarch64.sve.uzp1q.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
530declare <vscale x 4 x i32> @llvm.aarch64.sve.uzp1q.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
531declare <vscale x 8 x bfloat> @llvm.aarch64.sve.uzp1q.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
532declare <vscale x 8 x half> @llvm.aarch64.sve.uzp1q.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
533declare <vscale x 8 x i16> @llvm.aarch64.sve.uzp1q.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
534declare <vscale x 16 x i8> @llvm.aarch64.sve.uzp1q.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
535
536declare <vscale x 2 x double> @llvm.aarch64.sve.uzp2q.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
537declare <vscale x 2 x i64> @llvm.aarch64.sve.uzp2q.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
538declare <vscale x 4 x float> @llvm.aarch64.sve.uzp2q.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
539declare <vscale x 4 x i32> @llvm.aarch64.sve.uzp2q.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
540declare <vscale x 8 x bfloat> @llvm.aarch64.sve.uzp2q.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
541declare <vscale x 8 x half> @llvm.aarch64.sve.uzp2q.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
542declare <vscale x 8 x i16> @llvm.aarch64.sve.uzp2q.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
543declare <vscale x 16 x i8> @llvm.aarch64.sve.uzp2q.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
544
545declare <vscale x 2 x double> @llvm.aarch64.sve.zip1q.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
546declare <vscale x 2 x i64> @llvm.aarch64.sve.zip1q.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
547declare <vscale x 4 x float> @llvm.aarch64.sve.zip1q.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
548declare <vscale x 4 x i32> @llvm.aarch64.sve.zip1q.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
549declare <vscale x 8 x bfloat> @llvm.aarch64.sve.zip1q.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
550declare <vscale x 8 x half> @llvm.aarch64.sve.zip1q.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
551declare <vscale x 8 x i16> @llvm.aarch64.sve.zip1q.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
552declare <vscale x 16 x i8> @llvm.aarch64.sve.zip1q.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
553
554declare <vscale x 2 x double> @llvm.aarch64.sve.zip2q.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
555declare <vscale x 2 x i64> @llvm.aarch64.sve.zip2q.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
556declare <vscale x 4 x float> @llvm.aarch64.sve.zip2q.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
557declare <vscale x 4 x i32> @llvm.aarch64.sve.zip2q.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
558declare <vscale x 8 x bfloat> @llvm.aarch64.sve.zip2q.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
559declare <vscale x 8 x half> @llvm.aarch64.sve.zip2q.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
560declare <vscale x 8 x i16> @llvm.aarch64.sve.zip2q.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
561declare <vscale x 16 x i8> @llvm.aarch64.sve.zip2q.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
562
563; +bf16 is required for the bfloat version.
564attributes #0 = { "target-features"="+sve,+f64mm,+bf16" }
565