xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll (revision 61510b51c33464a6bc15e4cf5b1ee07e2e0ec1c9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK,SVE
3; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s --check-prefixes=CHECK,SVE2
4
5;
6; CLASTA (Vectors)
7;
8
9define <vscale x 16 x i8> @clasta_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
10; CHECK-LABEL: clasta_i8:
11; CHECK:       // %bb.0:
12; CHECK-NEXT:    clasta z0.b, p0, z0.b, z1.b
13; CHECK-NEXT:    ret
14  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.clasta.nxv16i8(<vscale x 16 x i1> %pg,
15                                                                  <vscale x 16 x i8> %a,
16                                                                  <vscale x 16 x i8> %b)
17  ret <vscale x 16 x i8> %out
18}
19
20define <vscale x 8 x i16> @clasta_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
21; CHECK-LABEL: clasta_i16:
22; CHECK:       // %bb.0:
23; CHECK-NEXT:    clasta z0.h, p0, z0.h, z1.h
24; CHECK-NEXT:    ret
25  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.clasta.nxv8i16(<vscale x 8 x i1> %pg,
26                                                                  <vscale x 8 x i16> %a,
27                                                                  <vscale x 8 x i16> %b)
28  ret <vscale x 8 x i16> %out
29}
30
31define <vscale x 4 x i32> @clasta_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
32; CHECK-LABEL: clasta_i32:
33; CHECK:       // %bb.0:
34; CHECK-NEXT:    clasta z0.s, p0, z0.s, z1.s
35; CHECK-NEXT:    ret
36  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.clasta.nxv4i32(<vscale x 4 x i1> %pg,
37                                                                  <vscale x 4 x i32> %a,
38                                                                  <vscale x 4 x i32> %b)
39  ret <vscale x 4 x i32> %out
40}
41
42define <vscale x 2 x i64> @clasta_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
43; CHECK-LABEL: clasta_i64:
44; CHECK:       // %bb.0:
45; CHECK-NEXT:    clasta z0.d, p0, z0.d, z1.d
46; CHECK-NEXT:    ret
47  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.clasta.nxv2i64(<vscale x 2 x i1> %pg,
48                                                                  <vscale x 2 x i64> %a,
49                                                                  <vscale x 2 x i64> %b)
50  ret <vscale x 2 x i64> %out
51}
52
53define <vscale x 8 x half> @clasta_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
54; CHECK-LABEL: clasta_f16:
55; CHECK:       // %bb.0:
56; CHECK-NEXT:    clasta z0.h, p0, z0.h, z1.h
57; CHECK-NEXT:    ret
58  %out = call <vscale x 8 x half> @llvm.aarch64.sve.clasta.nxv8f16(<vscale x 8 x i1> %pg,
59                                                                   <vscale x 8 x half> %a,
60                                                                   <vscale x 8 x half> %b)
61  ret <vscale x 8 x half> %out
62}
63
64define <vscale x 8 x bfloat> @clasta_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
65; CHECK-LABEL: clasta_bf16:
66; CHECK:       // %bb.0:
67; CHECK-NEXT:    clasta z0.h, p0, z0.h, z1.h
68; CHECK-NEXT:    ret
69  %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.clasta.nxv8bf16(<vscale x 8 x i1> %pg,
70                                                                      <vscale x 8 x bfloat> %a,
71                                                                      <vscale x 8 x bfloat> %b)
72  ret <vscale x 8 x bfloat> %out
73}
74
75define <vscale x 4 x float> @clasta_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
76; CHECK-LABEL: clasta_f32:
77; CHECK:       // %bb.0:
78; CHECK-NEXT:    clasta z0.s, p0, z0.s, z1.s
79; CHECK-NEXT:    ret
80  %out = call <vscale x 4 x float> @llvm.aarch64.sve.clasta.nxv4f32(<vscale x 4 x i1> %pg,
81                                                                    <vscale x 4 x float> %a,
82                                                                    <vscale x 4 x float> %b)
83  ret <vscale x 4 x float> %out
84}
85
86define <vscale x 2 x double> @clasta_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
87; CHECK-LABEL: clasta_f64:
88; CHECK:       // %bb.0:
89; CHECK-NEXT:    clasta z0.d, p0, z0.d, z1.d
90; CHECK-NEXT:    ret
91  %out = call <vscale x 2 x double> @llvm.aarch64.sve.clasta.nxv2f64(<vscale x 2 x i1> %pg,
92                                                                     <vscale x 2 x double> %a,
93                                                                     <vscale x 2 x double> %b)
94  ret <vscale x 2 x double> %out
95}
96
97;
98; CLASTA (Scalar)
99;
100
101define i8 @clasta_n_i8(<vscale x 16 x i1> %pg, i8 %a, <vscale x 16 x i8> %b) {
102; CHECK-LABEL: clasta_n_i8:
103; CHECK:       // %bb.0:
104; CHECK-NEXT:    clasta w0, p0, w0, z0.b
105; CHECK-NEXT:    ret
106  %out = call i8 @llvm.aarch64.sve.clasta.n.nxv16i8(<vscale x 16 x i1> %pg,
107                                                    i8 %a,
108                                                    <vscale x 16 x i8> %b)
109  ret i8 %out
110}
111
112define i16 @clasta_n_i16(<vscale x 8 x i1> %pg, i16 %a, <vscale x 8 x i16> %b) {
113; CHECK-LABEL: clasta_n_i16:
114; CHECK:       // %bb.0:
115; CHECK-NEXT:    clasta w0, p0, w0, z0.h
116; CHECK-NEXT:    ret
117  %out = call i16 @llvm.aarch64.sve.clasta.n.nxv8i16(<vscale x 8 x i1> %pg,
118                                                     i16 %a,
119                                                     <vscale x 8 x i16> %b)
120  ret i16 %out
121}
122
123define i32 @clasta_n_i32(<vscale x 4 x i1> %pg, i32 %a, <vscale x 4 x i32> %b) {
124; CHECK-LABEL: clasta_n_i32:
125; CHECK:       // %bb.0:
126; CHECK-NEXT:    clasta w0, p0, w0, z0.s
127; CHECK-NEXT:    ret
128  %out = call i32 @llvm.aarch64.sve.clasta.n.nxv4i32(<vscale x 4 x i1> %pg,
129                                                     i32 %a,
130                                                     <vscale x 4 x i32> %b)
131  ret i32 %out
132}
133
134define i64 @clasta_n_i64(<vscale x 2 x i1> %pg, i64 %a, <vscale x 2 x i64> %b) {
135; CHECK-LABEL: clasta_n_i64:
136; CHECK:       // %bb.0:
137; CHECK-NEXT:    clasta x0, p0, x0, z0.d
138; CHECK-NEXT:    ret
139  %out = call i64 @llvm.aarch64.sve.clasta.n.nxv2i64(<vscale x 2 x i1> %pg,
140                                                     i64 %a,
141                                                     <vscale x 2 x i64> %b)
142  ret i64 %out
143}
144
145define half @clasta_n_f16(<vscale x 8 x i1> %pg, half %a, <vscale x 8 x half> %b) {
146; CHECK-LABEL: clasta_n_f16:
147; CHECK:       // %bb.0:
148; CHECK-NEXT:    clasta h0, p0, h0, z1.h
149; CHECK-NEXT:    ret
150  %out = call half @llvm.aarch64.sve.clasta.n.nxv8f16(<vscale x 8 x i1> %pg,
151                                                      half %a,
152                                                      <vscale x 8 x half> %b)
153  ret half %out
154}
155
156define bfloat @clasta_n_bf16(<vscale x 8 x i1> %pg, bfloat %a, <vscale x 8 x bfloat> %b) #0 {
157; CHECK-LABEL: clasta_n_bf16:
158; CHECK:       // %bb.0:
159; CHECK-NEXT:    clasta h0, p0, h0, z1.h
160; CHECK-NEXT:    ret
161  %out = call bfloat @llvm.aarch64.sve.clasta.n.nxv8bf16(<vscale x 8 x i1> %pg,
162                                                         bfloat %a,
163                                                         <vscale x 8 x bfloat> %b)
164  ret bfloat %out
165}
166
167define float @clasta_n_f32(<vscale x 4 x i1> %pg, float %a, <vscale x 4 x float> %b) {
168; CHECK-LABEL: clasta_n_f32:
169; CHECK:       // %bb.0:
170; CHECK-NEXT:    clasta s0, p0, s0, z1.s
171; CHECK-NEXT:    ret
172  %out = call float @llvm.aarch64.sve.clasta.n.nxv4f32(<vscale x 4 x i1> %pg,
173                                                       float %a,
174                                                       <vscale x 4 x float> %b)
175  ret float %out
176}
177
178define double @clasta_n_f64(<vscale x 2 x i1> %pg, double %a, <vscale x 2 x double> %b) {
179; CHECK-LABEL: clasta_n_f64:
180; CHECK:       // %bb.0:
181; CHECK-NEXT:    clasta d0, p0, d0, z1.d
182; CHECK-NEXT:    ret
183  %out = call double @llvm.aarch64.sve.clasta.n.nxv2f64(<vscale x 2 x i1> %pg,
184                                                        double %a,
185                                                        <vscale x 2 x double> %b)
186  ret double %out
187}
188
189;
190; CLASTB (Vectors)
191;
192
193define <vscale x 16 x i8> @clastb_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
194; CHECK-LABEL: clastb_i8:
195; CHECK:       // %bb.0:
196; CHECK-NEXT:    clastb z0.b, p0, z0.b, z1.b
197; CHECK-NEXT:    ret
198  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.clastb.nxv16i8(<vscale x 16 x i1> %pg,
199                                                                  <vscale x 16 x i8> %a,
200                                                                  <vscale x 16 x i8> %b)
201  ret <vscale x 16 x i8> %out
202}
203
204define <vscale x 8 x i16> @clastb_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
205; CHECK-LABEL: clastb_i16:
206; CHECK:       // %bb.0:
207; CHECK-NEXT:    clastb z0.h, p0, z0.h, z1.h
208; CHECK-NEXT:    ret
209  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.clastb.nxv8i16(<vscale x 8 x i1> %pg,
210                                                                  <vscale x 8 x i16> %a,
211                                                                  <vscale x 8 x i16> %b)
212  ret <vscale x 8 x i16> %out
213}
214
215define <vscale x 4 x i32> @clastb_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
216; CHECK-LABEL: clastb_i32:
217; CHECK:       // %bb.0:
218; CHECK-NEXT:    clastb z0.s, p0, z0.s, z1.s
219; CHECK-NEXT:    ret
220  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.clastb.nxv4i32(<vscale x 4 x i1> %pg,
221                                                                  <vscale x 4 x i32> %a,
222                                                                  <vscale x 4 x i32> %b)
223  ret <vscale x 4 x i32> %out
224}
225
226define <vscale x 2 x i64> @clastb_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
227; CHECK-LABEL: clastb_i64:
228; CHECK:       // %bb.0:
229; CHECK-NEXT:    clastb z0.d, p0, z0.d, z1.d
230; CHECK-NEXT:    ret
231  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.clastb.nxv2i64(<vscale x 2 x i1> %pg,
232                                                                  <vscale x 2 x i64> %a,
233                                                                  <vscale x 2 x i64> %b)
234  ret <vscale x 2 x i64> %out
235}
236
237define <vscale x 8 x half> @clastb_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
238; CHECK-LABEL: clastb_f16:
239; CHECK:       // %bb.0:
240; CHECK-NEXT:    clastb z0.h, p0, z0.h, z1.h
241; CHECK-NEXT:    ret
242  %out = call <vscale x 8 x half> @llvm.aarch64.sve.clastb.nxv8f16(<vscale x 8 x i1> %pg,
243                                                                   <vscale x 8 x half> %a,
244                                                                   <vscale x 8 x half> %b)
245  ret <vscale x 8 x half> %out
246}
247
248define <vscale x 8 x bfloat> @clastb_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
249; CHECK-LABEL: clastb_bf16:
250; CHECK:       // %bb.0:
251; CHECK-NEXT:    clastb z0.h, p0, z0.h, z1.h
252; CHECK-NEXT:    ret
253  %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.clastb.nxv8bf16(<vscale x 8 x i1> %pg,
254                                                                      <vscale x 8 x bfloat> %a,
255                                                                      <vscale x 8 x bfloat> %b)
256  ret <vscale x 8 x bfloat> %out
257}
258
259define <vscale x 4 x float> @clastb_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
260; CHECK-LABEL: clastb_f32:
261; CHECK:       // %bb.0:
262; CHECK-NEXT:    clastb z0.s, p0, z0.s, z1.s
263; CHECK-NEXT:    ret
264  %out = call <vscale x 4 x float> @llvm.aarch64.sve.clastb.nxv4f32(<vscale x 4 x i1> %pg,
265                                                                    <vscale x 4 x float> %a,
266                                                                    <vscale x 4 x float> %b)
267  ret <vscale x 4 x float> %out
268}
269
270define <vscale x 2 x double> @clastb_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
271; CHECK-LABEL: clastb_f64:
272; CHECK:       // %bb.0:
273; CHECK-NEXT:    clastb z0.d, p0, z0.d, z1.d
274; CHECK-NEXT:    ret
275  %out = call <vscale x 2 x double> @llvm.aarch64.sve.clastb.nxv2f64(<vscale x 2 x i1> %pg,
276                                                                     <vscale x 2 x double> %a,
277                                                                     <vscale x 2 x double> %b)
278  ret <vscale x 2 x double> %out
279}
280
281;
282; CLASTB (Scalar)
283;
284
285define i8 @clastb_n_i8(<vscale x 16 x i1> %pg, i8 %a, <vscale x 16 x i8> %b) {
286; CHECK-LABEL: clastb_n_i8:
287; CHECK:       // %bb.0:
288; CHECK-NEXT:    clastb w0, p0, w0, z0.b
289; CHECK-NEXT:    ret
290  %out = call i8 @llvm.aarch64.sve.clastb.n.nxv16i8(<vscale x 16 x i1> %pg,
291                                                    i8 %a,
292                                                    <vscale x 16 x i8> %b)
293  ret i8 %out
294}
295
296define i16 @clastb_n_i16(<vscale x 8 x i1> %pg, i16 %a, <vscale x 8 x i16> %b) {
297; CHECK-LABEL: clastb_n_i16:
298; CHECK:       // %bb.0:
299; CHECK-NEXT:    clastb w0, p0, w0, z0.h
300; CHECK-NEXT:    ret
301  %out = call i16 @llvm.aarch64.sve.clastb.n.nxv8i16(<vscale x 8 x i1> %pg,
302                                                     i16 %a,
303                                                     <vscale x 8 x i16> %b)
304  ret i16 %out
305}
306
307define i32 @clastb_n_i32(<vscale x 4 x i1> %pg, i32 %a, <vscale x 4 x i32> %b) {
308; CHECK-LABEL: clastb_n_i32:
309; CHECK:       // %bb.0:
310; CHECK-NEXT:    clastb w0, p0, w0, z0.s
311; CHECK-NEXT:    ret
312  %out = call i32 @llvm.aarch64.sve.clastb.n.nxv4i32(<vscale x 4 x i1> %pg,
313                                                     i32 %a,
314                                                     <vscale x 4 x i32> %b)
315  ret i32 %out
316}
317
318define i64 @clastb_n_i64(<vscale x 2 x i1> %pg, i64 %a, <vscale x 2 x i64> %b) {
319; CHECK-LABEL: clastb_n_i64:
320; CHECK:       // %bb.0:
321; CHECK-NEXT:    clastb x0, p0, x0, z0.d
322; CHECK-NEXT:    ret
323  %out = call i64 @llvm.aarch64.sve.clastb.n.nxv2i64(<vscale x 2 x i1> %pg,
324                                                     i64 %a,
325                                                     <vscale x 2 x i64> %b)
326  ret i64 %out
327}
328
329define half @clastb_n_f16(<vscale x 8 x i1> %pg, half %a, <vscale x 8 x half> %b) {
330; CHECK-LABEL: clastb_n_f16:
331; CHECK:       // %bb.0:
332; CHECK-NEXT:    clastb h0, p0, h0, z1.h
333; CHECK-NEXT:    ret
334  %out = call half @llvm.aarch64.sve.clastb.n.nxv8f16(<vscale x 8 x i1> %pg,
335                                                      half %a,
336                                                      <vscale x 8 x half> %b)
337  ret half %out
338}
339
340define bfloat @clastb_n_bf16(<vscale x 8 x i1> %pg, bfloat %a, <vscale x 8 x bfloat> %b) #0 {
341; CHECK-LABEL: clastb_n_bf16:
342; CHECK:       // %bb.0:
343; CHECK-NEXT:    clastb h0, p0, h0, z1.h
344; CHECK-NEXT:    ret
345  %out = call bfloat @llvm.aarch64.sve.clastb.n.nxv8bf16(<vscale x 8 x i1> %pg,
346                                                         bfloat %a,
347                                                         <vscale x 8 x bfloat> %b)
348  ret bfloat %out
349}
350
351define float @clastb_n_f32(<vscale x 4 x i1> %pg, float %a, <vscale x 4 x float> %b) {
352; CHECK-LABEL: clastb_n_f32:
353; CHECK:       // %bb.0:
354; CHECK-NEXT:    clastb s0, p0, s0, z1.s
355; CHECK-NEXT:    ret
356  %out = call float @llvm.aarch64.sve.clastb.n.nxv4f32(<vscale x 4 x i1> %pg,
357                                                       float %a,
358                                                       <vscale x 4 x float> %b)
359  ret float %out
360}
361
362define double @clastb_n_f64(<vscale x 2 x i1> %pg, double %a, <vscale x 2 x double> %b) {
363; CHECK-LABEL: clastb_n_f64:
364; CHECK:       // %bb.0:
365; CHECK-NEXT:    clastb d0, p0, d0, z1.d
366; CHECK-NEXT:    ret
367  %out = call double @llvm.aarch64.sve.clastb.n.nxv2f64(<vscale x 2 x i1> %pg,
368                                                        double %a,
369                                                        <vscale x 2 x double> %b)
370  ret double %out
371}
372
373;
374; DUPQ
375;
376
377define <vscale x 16 x i8> @dupq_i8(<vscale x 16 x i8> %a) {
378; CHECK-LABEL: dupq_i8:
379; CHECK:       // %bb.0:
380; CHECK-NEXT:    mov z0.q, q0
381; CHECK-NEXT:    ret
382  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8> %a, i64 0)
383  ret <vscale x 16 x i8> %out
384}
385
386define <vscale x 8 x i16> @dupq_i16(<vscale x 8 x i16> %a) {
387; CHECK-LABEL: dupq_i16:
388; CHECK:       // %bb.0:
389; CHECK-NEXT:    mov z0.q, z0.q[1]
390; CHECK-NEXT:    ret
391  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16> %a, i64 1)
392  ret <vscale x 8 x i16> %out
393}
394
395define <vscale x 4 x i32> @dupq_i32(<vscale x 4 x i32> %a) {
396; CHECK-LABEL: dupq_i32:
397; CHECK:       // %bb.0:
398; CHECK-NEXT:    mov z0.q, z0.q[2]
399; CHECK-NEXT:    ret
400  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> %a, i64 2)
401  ret <vscale x 4 x i32> %out
402}
403
404define <vscale x 2 x i64> @dupq_i64(<vscale x 2 x i64> %a) {
405; CHECK-LABEL: dupq_i64:
406; CHECK:       // %bb.0:
407; CHECK-NEXT:    mov z0.q, z0.q[3]
408; CHECK-NEXT:    ret
409  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> %a, i64 3)
410  ret <vscale x 2 x i64> %out
411}
412
413define <vscale x 8 x half> @dupq_f16(<vscale x 8 x half> %a) {
414; CHECK-LABEL: dupq_f16:
415; CHECK:       // %bb.0:
416; CHECK-NEXT:    mov z0.q, q0
417; CHECK-NEXT:    ret
418  %out = call <vscale x 8 x half> @llvm.aarch64.sve.dupq.lane.nxv8f16(<vscale x 8 x half> %a, i64 0)
419  ret <vscale x 8 x half> %out
420}
421
422define <vscale x 8 x bfloat> @dupq_bf16(<vscale x 8 x bfloat> %a) #0 {
423; CHECK-LABEL: dupq_bf16:
424; CHECK:       // %bb.0:
425; CHECK-NEXT:    mov z0.q, q0
426; CHECK-NEXT:    ret
427  %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dupq.lane.nxv8bf16(<vscale x 8 x bfloat> %a, i64 0)
428  ret <vscale x 8 x bfloat> %out
429}
430
431define <vscale x 4 x float> @dupq_f32(<vscale x 4 x float> %a) {
432; CHECK-LABEL: dupq_f32:
433; CHECK:       // %bb.0:
434; CHECK-NEXT:    mov z0.q, z0.q[1]
435; CHECK-NEXT:    ret
436  %out = call <vscale x 4 x float> @llvm.aarch64.sve.dupq.lane.nxv4f32(<vscale x 4 x float> %a, i64 1)
437  ret <vscale x 4 x float> %out
438}
439
440define <vscale x 2 x double> @dupq_f64(<vscale x 2 x double> %a) {
441; CHECK-LABEL: dupq_f64:
442; CHECK:       // %bb.0:
443; CHECK-NEXT:    mov z0.q, z0.q[2]
444; CHECK-NEXT:    ret
445  %out = call <vscale x 2 x double> @llvm.aarch64.sve.dupq.lane.nxv2f64(<vscale x 2 x double> %a, i64 2)
446  ret <vscale x 2 x double> %out
447}
448
449;
450; DUPQ_LANE
451;
452
453define <vscale x 16 x i8> @dupq_lane_i8(<vscale x 16 x i8> %a, i64 %idx) {
454; CHECK-LABEL: dupq_lane_i8:
455; CHECK:       // %bb.0:
456; CHECK-NEXT:    index z1.d, #0, #1
457; CHECK-NEXT:    add x8, x0, x0
458; CHECK-NEXT:    mov z2.d, x8
459; CHECK-NEXT:    and z1.d, z1.d, #0x1
460; CHECK-NEXT:    add z1.d, z1.d, z2.d
461; CHECK-NEXT:    tbl z0.d, { z0.d }, z1.d
462; CHECK-NEXT:    ret
463  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8> %a, i64 %idx)
464  ret <vscale x 16 x i8> %out
465}
466
467; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
468define <vscale x 8 x i16> @dupq_lane_i16(<vscale x 8 x i16> %a, i64 %idx) {
469; CHECK-LABEL: dupq_lane_i16:
470; CHECK:       // %bb.0:
471; CHECK-NEXT:    index z1.d, #0, #1
472; CHECK-NEXT:    add x8, x0, x0
473; CHECK-NEXT:    mov z2.d, x8
474; CHECK-NEXT:    and z1.d, z1.d, #0x1
475; CHECK-NEXT:    add z1.d, z1.d, z2.d
476; CHECK-NEXT:    tbl z0.d, { z0.d }, z1.d
477; CHECK-NEXT:    ret
478  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16> %a, i64 %idx)
479  ret <vscale x 8 x i16> %out
480}
481
482; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
483define <vscale x 4 x i32> @dupq_lane_i32(<vscale x 4 x i32> %a, i64 %idx) {
484; CHECK-LABEL: dupq_lane_i32:
485; CHECK:       // %bb.0:
486; CHECK-NEXT:    index z1.d, #0, #1
487; CHECK-NEXT:    add x8, x0, x0
488; CHECK-NEXT:    mov z2.d, x8
489; CHECK-NEXT:    and z1.d, z1.d, #0x1
490; CHECK-NEXT:    add z1.d, z1.d, z2.d
491; CHECK-NEXT:    tbl z0.d, { z0.d }, z1.d
492; CHECK-NEXT:    ret
493  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> %a, i64 %idx)
494  ret <vscale x 4 x i32> %out
495}
496
497; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
498define <vscale x 2 x i64> @dupq_lane_i64(<vscale x 2 x i64> %a, i64 %idx) {
499; CHECK-LABEL: dupq_lane_i64:
500; CHECK:       // %bb.0:
501; CHECK-NEXT:    index z1.d, #0, #1
502; CHECK-NEXT:    add x8, x0, x0
503; CHECK-NEXT:    mov z2.d, x8
504; CHECK-NEXT:    and z1.d, z1.d, #0x1
505; CHECK-NEXT:    add z1.d, z1.d, z2.d
506; CHECK-NEXT:    tbl z0.d, { z0.d }, z1.d
507; CHECK-NEXT:    ret
508  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> %a, i64 %idx)
509  ret <vscale x 2 x i64> %out
510}
511
512; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
513define <vscale x 8 x half> @dupq_lane_f16(<vscale x 8 x half> %a, i64 %idx) {
514; CHECK-LABEL: dupq_lane_f16:
515; CHECK:       // %bb.0:
516; CHECK-NEXT:    index z1.d, #0, #1
517; CHECK-NEXT:    add x8, x0, x0
518; CHECK-NEXT:    mov z2.d, x8
519; CHECK-NEXT:    and z1.d, z1.d, #0x1
520; CHECK-NEXT:    add z1.d, z1.d, z2.d
521; CHECK-NEXT:    tbl z0.d, { z0.d }, z1.d
522; CHECK-NEXT:    ret
523  %out = call <vscale x 8 x half> @llvm.aarch64.sve.dupq.lane.nxv8f16(<vscale x 8 x half> %a, i64 %idx)
524  ret <vscale x 8 x half> %out
525}
526
527; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
528define <vscale x 8 x bfloat> @dupq_lane_bf16(<vscale x 8 x bfloat> %a, i64 %idx) #0 {
529; CHECK-LABEL: dupq_lane_bf16:
530; CHECK:       // %bb.0:
531; CHECK-NEXT:    index z1.d, #0, #1
532; CHECK-NEXT:    add x8, x0, x0
533; CHECK-NEXT:    mov z2.d, x8
534; CHECK-NEXT:    and z1.d, z1.d, #0x1
535; CHECK-NEXT:    add z1.d, z1.d, z2.d
536; CHECK-NEXT:    tbl z0.d, { z0.d }, z1.d
537; CHECK-NEXT:    ret
538  %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dupq.lane.nxv8bf16(<vscale x 8 x bfloat> %a, i64 %idx)
539  ret <vscale x 8 x bfloat> %out
540}
541
542; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
543define <vscale x 4 x float> @dupq_lane_f32(<vscale x 4 x float> %a, i64 %idx) {
544; CHECK-LABEL: dupq_lane_f32:
545; CHECK:       // %bb.0:
546; CHECK-NEXT:    index z1.d, #0, #1
547; CHECK-NEXT:    add x8, x0, x0
548; CHECK-NEXT:    mov z2.d, x8
549; CHECK-NEXT:    and z1.d, z1.d, #0x1
550; CHECK-NEXT:    add z1.d, z1.d, z2.d
551; CHECK-NEXT:    tbl z0.d, { z0.d }, z1.d
552; CHECK-NEXT:    ret
553  %out = call <vscale x 4 x float> @llvm.aarch64.sve.dupq.lane.nxv4f32(<vscale x 4 x float> %a, i64 %idx)
554  ret <vscale x 4 x float> %out
555}
556
557; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
558define <vscale x 2 x double> @dupq_lane_f64(<vscale x 2 x double> %a, i64 %idx) {
559; CHECK-LABEL: dupq_lane_f64:
560; CHECK:       // %bb.0:
561; CHECK-NEXT:    index z1.d, #0, #1
562; CHECK-NEXT:    add x8, x0, x0
563; CHECK-NEXT:    mov z2.d, x8
564; CHECK-NEXT:    and z1.d, z1.d, #0x1
565; CHECK-NEXT:    add z1.d, z1.d, z2.d
566; CHECK-NEXT:    tbl z0.d, { z0.d }, z1.d
567; CHECK-NEXT:    ret
568  %out = call <vscale x 2 x double> @llvm.aarch64.sve.dupq.lane.nxv2f64(<vscale x 2 x double> %a, i64 %idx)
569  ret <vscale x 2 x double> %out
570}
571
572; NOTE: Index out of range (0-3)
573define <vscale x 2 x i64> @dupq_i64_range(<vscale x 2 x i64> %a) {
574; SVE-LABEL: dupq_i64_range:
575; SVE:       // %bb.0:
576; SVE-NEXT:    index z1.d, #0, #1
577; SVE-NEXT:    and z1.d, z1.d, #0x1
578; SVE-NEXT:    orr z1.d, z1.d, #0x8
579; SVE-NEXT:    tbl z0.d, { z0.d }, z1.d
580; SVE-NEXT:    ret
581;
582; SVE2-LABEL: dupq_i64_range:
583; SVE2:       // %bb.0:
584; SVE2-NEXT:    index z1.d, #0, #1
585; SVE2-NEXT:    and z1.d, z1.d, #0x1
586; SVE2-NEXT:    add z1.d, z1.d, #8 // =0x8
587; SVE2-NEXT:    tbl z0.d, { z0.d }, z1.d
588; SVE2-NEXT:    ret
589  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> %a, i64 4)
590  ret <vscale x 2 x i64> %out
591}
592;
593; EXT
594;
595
596define dso_local <vscale x 4 x float> @dupq_f32_repeat_complex(float %x, float %y) {
597; CHECK-LABEL: dupq_f32_repeat_complex:
598; CHECK:       // %bb.0:
599; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
600; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
601; CHECK-NEXT:    mov v0.s[1], v1.s[0]
602; CHECK-NEXT:    mov z0.d, d0
603; CHECK-NEXT:    ret
604  %1 = insertelement <4 x float> undef, float %x, i64 0
605  %2 = insertelement <4 x float> %1, float %y, i64 1
606  %3 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> undef, <4 x float> %2, i64 0)
607  %4 = bitcast <vscale x 4 x float> %3 to <vscale x 2 x double>
608  %5 = shufflevector <vscale x 2 x double> %4, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
609  %6 = bitcast <vscale x 2 x double> %5 to <vscale x 4 x float>
610  ret <vscale x 4 x float> %6
611}
612
613define dso_local <vscale x 8 x half> @dupq_f16_repeat_complex(half %x, half %y) {
614; CHECK-LABEL: dupq_f16_repeat_complex:
615; CHECK:       // %bb.0:
616; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
617; CHECK-NEXT:    // kill: def $h1 killed $h1 def $q1
618; CHECK-NEXT:    mov v0.h[1], v1.h[0]
619; CHECK-NEXT:    mov z0.s, s0
620; CHECK-NEXT:    ret
621  %1 = insertelement <8 x half> undef, half %x, i64 0
622  %2 = insertelement <8 x half> %1, half %y, i64 1
623  %3 = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v8f16(<vscale x 8 x half> undef, <8 x half> %2, i64 0)
624  %4 = bitcast <vscale x 8 x half> %3 to <vscale x 4 x float>
625  %5 = shufflevector <vscale x 4 x float> %4, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
626  %6 = bitcast <vscale x 4 x float> %5 to <vscale x 8 x half>
627  ret <vscale x 8 x half> %6
628}
629
630define <vscale x 16 x i8> @ext_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
631; SVE-LABEL: ext_i8:
632; SVE:       // %bb.0:
633; SVE-NEXT:    ext z0.b, z0.b, z1.b, #255
634; SVE-NEXT:    ret
635;
636; SVE2-LABEL: ext_i8:
637; SVE2:       // %bb.0:
638; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
639; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
640; SVE2-NEXT:    ext z0.b, { z0.b, z1.b }, #255
641; SVE2-NEXT:    ret
642  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.ext.nxv16i8(<vscale x 16 x i8> %a,
643                                                               <vscale x 16 x i8> %b,
644                                                               i32 255)
645  ret <vscale x 16 x i8> %out
646}
647
648define <vscale x 8 x i16> @ext_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
649; SVE-LABEL: ext_i16:
650; SVE:       // %bb.0:
651; SVE-NEXT:    ext z0.b, z0.b, z1.b, #0
652; SVE-NEXT:    ret
653;
654; SVE2-LABEL: ext_i16:
655; SVE2:       // %bb.0:
656; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
657; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
658; SVE2-NEXT:    ext z0.b, { z0.b, z1.b }, #0
659; SVE2-NEXT:    ret
660  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ext.nxv8i16(<vscale x 8 x i16> %a,
661                                                               <vscale x 8 x i16> %b,
662                                                               i32 0)
663  ret <vscale x 8 x i16> %out
664}
665
666define <vscale x 4 x i32> @ext_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
667; SVE-LABEL: ext_i32:
668; SVE:       // %bb.0:
669; SVE-NEXT:    ext z0.b, z0.b, z1.b, #4
670; SVE-NEXT:    ret
671;
672; SVE2-LABEL: ext_i32:
673; SVE2:       // %bb.0:
674; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
675; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
676; SVE2-NEXT:    ext z0.b, { z0.b, z1.b }, #4
677; SVE2-NEXT:    ret
678  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ext.nxv4i32(<vscale x 4 x i32> %a,
679                                                               <vscale x 4 x i32> %b,
680                                                               i32 1)
681  ret <vscale x 4 x i32> %out
682}
683
684define <vscale x 2 x i64> @ext_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
685; SVE-LABEL: ext_i64:
686; SVE:       // %bb.0:
687; SVE-NEXT:    ext z0.b, z0.b, z1.b, #16
688; SVE-NEXT:    ret
689;
690; SVE2-LABEL: ext_i64:
691; SVE2:       // %bb.0:
692; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
693; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
694; SVE2-NEXT:    ext z0.b, { z0.b, z1.b }, #16
695; SVE2-NEXT:    ret
696  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ext.nxv2i64(<vscale x 2 x i64> %a,
697                                                               <vscale x 2 x i64> %b,
698                                                               i32 2)
699  ret <vscale x 2 x i64> %out
700}
701
702define <vscale x 8 x bfloat> @ext_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
703; SVE-LABEL: ext_bf16:
704; SVE:       // %bb.0:
705; SVE-NEXT:    ext z0.b, z0.b, z1.b, #6
706; SVE-NEXT:    ret
707;
708; SVE2-LABEL: ext_bf16:
709; SVE2:       // %bb.0:
710; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
711; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
712; SVE2-NEXT:    ext z0.b, { z0.b, z1.b }, #6
713; SVE2-NEXT:    ret
714  %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ext.nxv8bf16(<vscale x 8 x bfloat> %a,
715                                                                   <vscale x 8 x bfloat> %b,
716                                                                   i32 3)
717  ret <vscale x 8 x bfloat> %out
718}
719
720define <vscale x 8 x half> @ext_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
721; SVE-LABEL: ext_f16:
722; SVE:       // %bb.0:
723; SVE-NEXT:    ext z0.b, z0.b, z1.b, #6
724; SVE-NEXT:    ret
725;
726; SVE2-LABEL: ext_f16:
727; SVE2:       // %bb.0:
728; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
729; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
730; SVE2-NEXT:    ext z0.b, { z0.b, z1.b }, #6
731; SVE2-NEXT:    ret
732  %out = call <vscale x 8 x half> @llvm.aarch64.sve.ext.nxv8f16(<vscale x 8 x half> %a,
733                                                                <vscale x 8 x half> %b,
734                                                                i32 3)
735  ret <vscale x 8 x half> %out
736}
737
738define <vscale x 4 x float> @ext_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
739; SVE-LABEL: ext_f32:
740; SVE:       // %bb.0:
741; SVE-NEXT:    ext z0.b, z0.b, z1.b, #16
742; SVE-NEXT:    ret
743;
744; SVE2-LABEL: ext_f32:
745; SVE2:       // %bb.0:
746; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
747; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
748; SVE2-NEXT:    ext z0.b, { z0.b, z1.b }, #16
749; SVE2-NEXT:    ret
750  %out = call <vscale x 4 x float> @llvm.aarch64.sve.ext.nxv4f32(<vscale x 4 x float> %a,
751                                                                 <vscale x 4 x float> %b,
752                                                                 i32 4)
753  ret <vscale x 4 x float> %out
754}
755
756define <vscale x 2 x double> @ext_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
757; SVE-LABEL: ext_f64:
758; SVE:       // %bb.0:
759; SVE-NEXT:    ext z0.b, z0.b, z1.b, #40
760; SVE-NEXT:    ret
761;
762; SVE2-LABEL: ext_f64:
763; SVE2:       // %bb.0:
764; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
765; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
766; SVE2-NEXT:    ext z0.b, { z0.b, z1.b }, #40
767; SVE2-NEXT:    ret
768  %out = call <vscale x 2 x double> @llvm.aarch64.sve.ext.nxv2f64(<vscale x 2 x double> %a,
769                                                                  <vscale x 2 x double> %b,
770                                                                  i32 5)
771  ret <vscale x 2 x double> %out
772}
773
774;
775; LASTA
776;
777
778define i8 @lasta_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
779; CHECK-LABEL: lasta_i8:
780; CHECK:       // %bb.0:
781; CHECK-NEXT:    lasta w0, p0, z0.b
782; CHECK-NEXT:    ret
783  %res = call i8 @llvm.aarch64.sve.lasta.nxv16i8(<vscale x 16 x i1> %pg,
784                                                 <vscale x 16 x i8> %a)
785  ret i8 %res
786}
787
788define i16 @lasta_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
789; CHECK-LABEL: lasta_i16:
790; CHECK:       // %bb.0:
791; CHECK-NEXT:    lasta w0, p0, z0.h
792; CHECK-NEXT:    ret
793  %res = call i16 @llvm.aarch64.sve.lasta.nxv8i16(<vscale x 8 x i1> %pg,
794                                                  <vscale x 8 x i16> %a)
795  ret i16 %res
796}
797
798define i32 @lasta_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
799; CHECK-LABEL: lasta_i32:
800; CHECK:       // %bb.0:
801; CHECK-NEXT:    lasta w0, p0, z0.s
802; CHECK-NEXT:    ret
803  %res = call i32 @llvm.aarch64.sve.lasta.nxv4i32(<vscale x 4 x i1> %pg,
804                                                  <vscale x 4 x i32> %a)
805  ret i32 %res
806}
807
808define i64 @lasta_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
809; CHECK-LABEL: lasta_i64:
810; CHECK:       // %bb.0:
811; CHECK-NEXT:    lasta x0, p0, z0.d
812; CHECK-NEXT:    ret
813  %res = call i64 @llvm.aarch64.sve.lasta.nxv2i64(<vscale x 2 x i1> %pg,
814                                                  <vscale x 2 x i64> %a)
815  ret i64 %res
816}
817
818define half @lasta_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
819; CHECK-LABEL: lasta_f16:
820; CHECK:       // %bb.0:
821; CHECK-NEXT:    lasta h0, p0, z0.h
822; CHECK-NEXT:    ret
823  %res = call half @llvm.aarch64.sve.lasta.nxv8f16(<vscale x 8 x i1> %pg,
824                                                   <vscale x 8 x half> %a)
825  ret half %res
826}
827
828define bfloat @lasta_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a) #0 {
829; CHECK-LABEL: lasta_bf16:
830; CHECK:       // %bb.0:
831; CHECK-NEXT:    lasta h0, p0, z0.h
832; CHECK-NEXT:    ret
833  %res = call bfloat @llvm.aarch64.sve.lasta.nxv8bf16(<vscale x 8 x i1> %pg,
834                                                      <vscale x 8 x bfloat> %a)
835  ret bfloat %res
836}
837
838define float @lasta_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
839; CHECK-LABEL: lasta_f32:
840; CHECK:       // %bb.0:
841; CHECK-NEXT:    lasta s0, p0, z0.s
842; CHECK-NEXT:    ret
843  %res = call float @llvm.aarch64.sve.lasta.nxv4f32(<vscale x 4 x i1> %pg,
844                                                    <vscale x 4 x float> %a)
845  ret float %res
846}
847
848define float @lasta_f32_v2(<vscale x 2 x i1> %pg, <vscale x 2 x float> %a) {
849; CHECK-LABEL: lasta_f32_v2:
850; CHECK:       // %bb.0:
851; CHECK-NEXT:    lasta s0, p0, z0.s
852; CHECK-NEXT:    ret
853  %res = call float @llvm.aarch64.sve.lasta.nxv2f32(<vscale x 2 x i1> %pg,
854                                                    <vscale x 2 x float> %a)
855  ret float %res
856}
857
858define double @lasta_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
859; CHECK-LABEL: lasta_f64:
860; CHECK:       // %bb.0:
861; CHECK-NEXT:    lasta d0, p0, z0.d
862; CHECK-NEXT:    ret
863  %res = call double @llvm.aarch64.sve.lasta.nxv2f64(<vscale x 2 x i1> %pg,
864                                                     <vscale x 2 x double> %a)
865  ret double %res
866}
867
868;
869; LASTB
870;
871
872define i8 @lastb_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
873; CHECK-LABEL: lastb_i8:
874; CHECK:       // %bb.0:
875; CHECK-NEXT:    lastb w0, p0, z0.b
876; CHECK-NEXT:    ret
877  %res = call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg,
878                                                 <vscale x 16 x i8> %a)
879  ret i8 %res
880}
881
882define i16 @lastb_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
883; CHECK-LABEL: lastb_i16:
884; CHECK:       // %bb.0:
885; CHECK-NEXT:    lastb w0, p0, z0.h
886; CHECK-NEXT:    ret
887  %res = call i16 @llvm.aarch64.sve.lastb.nxv8i16(<vscale x 8 x i1> %pg,
888                                                  <vscale x 8 x i16> %a)
889  ret i16 %res
890}
891
892define i32 @lastb_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
893; CHECK-LABEL: lastb_i32:
894; CHECK:       // %bb.0:
895; CHECK-NEXT:    lastb w0, p0, z0.s
896; CHECK-NEXT:    ret
897  %res = call i32 @llvm.aarch64.sve.lastb.nxv4i32(<vscale x 4 x i1> %pg,
898                                                  <vscale x 4 x i32> %a)
899  ret i32 %res
900}
901
902define i64 @lastb_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
903; CHECK-LABEL: lastb_i64:
904; CHECK:       // %bb.0:
905; CHECK-NEXT:    lastb x0, p0, z0.d
906; CHECK-NEXT:    ret
907  %res = call i64 @llvm.aarch64.sve.lastb.nxv2i64(<vscale x 2 x i1> %pg,
908                                                  <vscale x 2 x i64> %a)
909  ret i64 %res
910}
911
912define half @lastb_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
913; CHECK-LABEL: lastb_f16:
914; CHECK:       // %bb.0:
915; CHECK-NEXT:    lastb h0, p0, z0.h
916; CHECK-NEXT:    ret
917  %res = call half @llvm.aarch64.sve.lastb.nxv8f16(<vscale x 8 x i1> %pg,
918                                                   <vscale x 8 x half> %a)
919  ret half %res
920}
921
922define bfloat @lastb_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a) #0 {
923; CHECK-LABEL: lastb_bf16:
924; CHECK:       // %bb.0:
925; CHECK-NEXT:    lastb h0, p0, z0.h
926; CHECK-NEXT:    ret
927  %res = call bfloat @llvm.aarch64.sve.lastb.nxv8bf16(<vscale x 8 x i1> %pg,
928                                                      <vscale x 8 x bfloat> %a)
929  ret bfloat %res
930}
931
932define float @lastb_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
933; CHECK-LABEL: lastb_f32:
934; CHECK:       // %bb.0:
935; CHECK-NEXT:    lastb s0, p0, z0.s
936; CHECK-NEXT:    ret
937  %res = call float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1> %pg,
938                                                    <vscale x 4 x float> %a)
939  ret float %res
940}
941
942define float @lastb_f32_v2(<vscale x 2 x i1> %pg, <vscale x 2 x float> %a) {
943; CHECK-LABEL: lastb_f32_v2:
944; CHECK:       // %bb.0:
945; CHECK-NEXT:    lastb s0, p0, z0.s
946; CHECK-NEXT:    ret
947  %res = call float @llvm.aarch64.sve.lastb.nxv2f32(<vscale x 2 x i1> %pg,
948                                                    <vscale x 2 x float> %a)
949  ret float %res
950}
951
952define double @lastb_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
953; CHECK-LABEL: lastb_f64:
954; CHECK:       // %bb.0:
955; CHECK-NEXT:    lastb d0, p0, z0.d
956; CHECK-NEXT:    ret
957  %res = call double @llvm.aarch64.sve.lastb.nxv2f64(<vscale x 2 x i1> %pg,
958                                                     <vscale x 2 x double> %a)
959  ret double %res
960}
961
962;
963; COMPACT
964;
965
966define <vscale x 4 x i32> @compact_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
967; CHECK-LABEL: compact_i32:
968; CHECK:       // %bb.0:
969; CHECK-NEXT:    compact z0.s, p0, z0.s
970; CHECK-NEXT:    ret
971  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.compact.nxv4i32(<vscale x 4 x i1> %pg,
972                                                                   <vscale x 4 x i32> %a)
973  ret <vscale x 4 x i32> %out
974}
975
976define <vscale x 2 x i64> @compact_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
977; CHECK-LABEL: compact_i64:
978; CHECK:       // %bb.0:
979; CHECK-NEXT:    compact z0.d, p0, z0.d
980; CHECK-NEXT:    ret
981  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.compact.nxv2i64(<vscale x 2 x i1> %pg,
982                                                                   <vscale x 2 x i64> %a)
983  ret <vscale x 2 x i64> %out
984}
985
986define <vscale x 4 x float> @compact_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
987; CHECK-LABEL: compact_f32:
988; CHECK:       // %bb.0:
989; CHECK-NEXT:    compact z0.s, p0, z0.s
990; CHECK-NEXT:    ret
991  %out = call <vscale x 4 x float> @llvm.aarch64.sve.compact.nxv4f32(<vscale x 4 x i1> %pg,
992                                                                     <vscale x 4 x float> %a)
993  ret <vscale x 4 x float> %out
994}
995
996define <vscale x 2 x double> @compact_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
997; CHECK-LABEL: compact_f64:
998; CHECK:       // %bb.0:
999; CHECK-NEXT:    compact z0.d, p0, z0.d
1000; CHECK-NEXT:    ret
1001  %out = call <vscale x 2 x double> @llvm.aarch64.sve.compact.nxv2f64(<vscale x 2 x i1> %pg,
1002                                                                      <vscale x 2 x double> %a)
1003  ret <vscale x 2 x double> %out
1004}
1005
1006;
1007; REV
1008;
1009
1010define <vscale x 16 x i1> @rev_nxv16i1(<vscale x 16 x i1> %a) {
1011; CHECK-LABEL: rev_nxv16i1:
1012; CHECK:       // %bb.0:
1013; CHECK-NEXT:    rev p0.b, p0.b
1014; CHECK-NEXT:    ret
1015  %res = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.nxv16i1(<vscale x 16 x i1> %a)
1016  ret <vscale x 16 x i1> %res
1017}
1018
1019define <vscale x 8 x i1> @rev_nxv8i1(<vscale x 8 x i1> %a) {
1020; CHECK-LABEL: rev_nxv8i1:
1021; CHECK:       // %bb.0:
1022; CHECK-NEXT:    rev p0.h, p0.h
1023; CHECK-NEXT:    ret
1024  %res = call <vscale x 8 x i1> @llvm.aarch64.sve.rev.nxv8i1(<vscale x 8 x i1> %a)
1025  ret <vscale x 8 x i1> %res
1026}
1027
1028define <vscale x 4 x i1> @rev_nxv4i1(<vscale x 4 x i1> %a) {
1029; CHECK-LABEL: rev_nxv4i1:
1030; CHECK:       // %bb.0:
1031; CHECK-NEXT:    rev p0.s, p0.s
1032; CHECK-NEXT:    ret
1033  %res = call <vscale x 4 x i1> @llvm.aarch64.sve.rev.nxv4i1(<vscale x 4 x i1> %a)
1034  ret <vscale x 4 x i1> %res
1035}
1036
1037define <vscale x 2 x i1> @rev_nxv2i1(<vscale x 2 x i1> %a) {
1038; CHECK-LABEL: rev_nxv2i1:
1039; CHECK:       // %bb.0:
1040; CHECK-NEXT:    rev p0.d, p0.d
1041; CHECK-NEXT:    ret
1042  %res = call <vscale x 2 x i1> @llvm.aarch64.sve.rev.nxv2i1(<vscale x 2 x i1> %a)
1043  ret <vscale x 2 x i1> %res
1044}
1045
1046define <vscale x 16 x i1> @rev_b16(<vscale x 16 x i1> %a) {
1047; CHECK-LABEL: rev_b16:
1048; CHECK:       // %bb.0:
1049; CHECK-NEXT:    rev p0.h, p0.h
1050; CHECK-NEXT:    ret
1051  %res = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b16(<vscale x 16 x i1> %a)
1052  ret <vscale x 16 x i1> %res
1053}
1054
1055define <vscale x 16 x i1> @rev_b32(<vscale x 16 x i1> %a) {
1056; CHECK-LABEL: rev_b32:
1057; CHECK:       // %bb.0:
1058; CHECK-NEXT:    rev p0.s, p0.s
1059; CHECK-NEXT:    ret
1060  %res = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b32(<vscale x 16 x i1> %a)
1061  ret <vscale x 16 x i1> %res
1062}
1063
1064define <vscale x 16 x i1> @rev_b64(<vscale x 16 x i1> %a) {
1065; CHECK-LABEL: rev_b64:
1066; CHECK:       // %bb.0:
1067; CHECK-NEXT:    rev p0.d, p0.d
1068; CHECK-NEXT:    ret
1069  %res = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b64(<vscale x 16 x i1> %a)
1070  ret <vscale x 16 x i1> %res
1071}
1072
1073define <vscale x 16 x i8> @rev_i8(<vscale x 16 x i8> %a) {
1074; CHECK-LABEL: rev_i8:
1075; CHECK:       // %bb.0:
1076; CHECK-NEXT:    rev z0.b, z0.b
1077; CHECK-NEXT:    ret
1078  %res = call <vscale x 16 x i8> @llvm.aarch64.sve.rev.nxv16i8(<vscale x 16 x i8> %a)
1079  ret <vscale x 16 x i8> %res
1080}
1081
1082define <vscale x 8 x i16> @rev_i16(<vscale x 8 x i16> %a) {
1083; CHECK-LABEL: rev_i16:
1084; CHECK:       // %bb.0:
1085; CHECK-NEXT:    rev z0.h, z0.h
1086; CHECK-NEXT:    ret
1087  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.rev.nxv8i16(<vscale x 8 x i16> %a)
1088  ret <vscale x 8 x i16> %res
1089}
1090
1091define <vscale x 4 x i32> @rev_i32(<vscale x 4 x i32> %a) {
1092; CHECK-LABEL: rev_i32:
1093; CHECK:       // %bb.0:
1094; CHECK-NEXT:    rev z0.s, z0.s
1095; CHECK-NEXT:    ret
1096  %res = call <vscale x 4 x i32> @llvm.aarch64.sve.rev.nxv4i32(<vscale x 4 x i32> %a)
1097  ret <vscale x 4 x i32> %res
1098}
1099
1100define <vscale x 2 x i64> @rev_i64(<vscale x 2 x i64> %a) {
1101; CHECK-LABEL: rev_i64:
1102; CHECK:       // %bb.0:
1103; CHECK-NEXT:    rev z0.d, z0.d
1104; CHECK-NEXT:    ret
1105  %res = call <vscale x 2 x i64> @llvm.aarch64.sve.rev.nxv2i64(<vscale x 2 x i64> %a)
1106  ret <vscale x 2 x i64> %res
1107}
1108
1109define <vscale x 8 x bfloat> @rev_bf16(<vscale x 8 x bfloat> %a) #0 {
1110; CHECK-LABEL: rev_bf16:
1111; CHECK:       // %bb.0:
1112; CHECK-NEXT:    rev z0.h, z0.h
1113; CHECK-NEXT:    ret
1114  %res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.rev.nxv8bf16(<vscale x 8 x bfloat> %a)
1115  ret <vscale x 8 x bfloat> %res
1116}
1117
1118define <vscale x 8 x half> @rev_f16(<vscale x 8 x half> %a) {
1119; CHECK-LABEL: rev_f16:
1120; CHECK:       // %bb.0:
1121; CHECK-NEXT:    rev z0.h, z0.h
1122; CHECK-NEXT:    ret
1123  %res = call <vscale x 8 x half> @llvm.aarch64.sve.rev.nxv8f16(<vscale x 8 x half> %a)
1124  ret <vscale x 8 x half> %res
1125}
1126
1127define <vscale x 4 x float> @rev_f32(<vscale x 4 x float> %a) {
1128; CHECK-LABEL: rev_f32:
1129; CHECK:       // %bb.0:
1130; CHECK-NEXT:    rev z0.s, z0.s
1131; CHECK-NEXT:    ret
1132  %res = call <vscale x 4 x float> @llvm.aarch64.sve.rev.nxv4f32(<vscale x 4 x float> %a)
1133  ret <vscale x 4 x float> %res
1134}
1135
1136define <vscale x 2 x double> @rev_f64(<vscale x 2 x double> %a) {
1137; CHECK-LABEL: rev_f64:
1138; CHECK:       // %bb.0:
1139; CHECK-NEXT:    rev z0.d, z0.d
1140; CHECK-NEXT:    ret
1141  %res = call <vscale x 2 x double> @llvm.aarch64.sve.rev.nxv2f64(<vscale x 2 x double> %a)
1142  ret <vscale x 2 x double> %res
1143}
1144
1145;
1146; SPLICE
1147;
1148
1149define <vscale x 16 x i8> @splice_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1150; SVE-LABEL: splice_i8:
1151; SVE:       // %bb.0:
1152; SVE-NEXT:    splice z0.b, p0, z0.b, z1.b
1153; SVE-NEXT:    ret
1154;
1155; SVE2-LABEL: splice_i8:
1156; SVE2:       // %bb.0:
1157; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
1158; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
1159; SVE2-NEXT:    splice z0.b, p0, { z0.b, z1.b }
1160; SVE2-NEXT:    ret
1161  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.splice.nxv16i8(<vscale x 16 x i1> %pg,
1162                                                                  <vscale x 16 x i8> %a,
1163                                                                  <vscale x 16 x i8> %b)
1164  ret <vscale x 16 x i8> %out
1165}
1166
1167define <vscale x 8 x i16> @splice_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1168; SVE-LABEL: splice_i16:
1169; SVE:       // %bb.0:
1170; SVE-NEXT:    splice z0.h, p0, z0.h, z1.h
1171; SVE-NEXT:    ret
1172;
1173; SVE2-LABEL: splice_i16:
1174; SVE2:       // %bb.0:
1175; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
1176; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
1177; SVE2-NEXT:    splice z0.h, p0, { z0.h, z1.h }
1178; SVE2-NEXT:    ret
1179  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.splice.nxv8i16(<vscale x 8 x i1> %pg,
1180                                                                  <vscale x 8 x i16> %a,
1181                                                                  <vscale x 8 x i16> %b)
1182  ret <vscale x 8 x i16> %out
1183}
1184
1185define <vscale x 4 x i32> @splice_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1186; SVE-LABEL: splice_i32:
1187; SVE:       // %bb.0:
1188; SVE-NEXT:    splice z0.s, p0, z0.s, z1.s
1189; SVE-NEXT:    ret
1190;
1191; SVE2-LABEL: splice_i32:
1192; SVE2:       // %bb.0:
1193; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
1194; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
1195; SVE2-NEXT:    splice z0.s, p0, { z0.s, z1.s }
1196; SVE2-NEXT:    ret
1197  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.splice.nxv4i32(<vscale x 4 x i1> %pg,
1198                                                                  <vscale x 4 x i32> %a,
1199                                                                  <vscale x 4 x i32> %b)
1200  ret <vscale x 4 x i32> %out
1201}
1202
1203define <vscale x 2 x i64> @splice_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
1204; SVE-LABEL: splice_i64:
1205; SVE:       // %bb.0:
1206; SVE-NEXT:    splice z0.d, p0, z0.d, z1.d
1207; SVE-NEXT:    ret
1208;
1209; SVE2-LABEL: splice_i64:
1210; SVE2:       // %bb.0:
1211; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
1212; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
1213; SVE2-NEXT:    splice z0.d, p0, { z0.d, z1.d }
1214; SVE2-NEXT:    ret
1215  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.splice.nxv2i64(<vscale x 2 x i1> %pg,
1216                                                                  <vscale x 2 x i64> %a,
1217                                                                  <vscale x 2 x i64> %b)
1218  ret <vscale x 2 x i64> %out
1219}
1220
1221define <vscale x 8 x bfloat> @splice_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
1222; SVE-LABEL: splice_bf16:
1223; SVE:       // %bb.0:
1224; SVE-NEXT:    splice z0.h, p0, z0.h, z1.h
1225; SVE-NEXT:    ret
1226;
1227; SVE2-LABEL: splice_bf16:
1228; SVE2:       // %bb.0:
1229; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
1230; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
1231; SVE2-NEXT:    splice z0.h, p0, { z0.h, z1.h }
1232; SVE2-NEXT:    ret
1233  %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.splice.nxv8bf16(<vscale x 8 x i1> %pg,
1234                                                                      <vscale x 8 x bfloat> %a,
1235                                                                      <vscale x 8 x bfloat> %b)
1236  ret <vscale x 8 x bfloat> %out
1237}
1238
1239define <vscale x 8 x half> @splice_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
1240; SVE-LABEL: splice_f16:
1241; SVE:       // %bb.0:
1242; SVE-NEXT:    splice z0.h, p0, z0.h, z1.h
1243; SVE-NEXT:    ret
1244;
1245; SVE2-LABEL: splice_f16:
1246; SVE2:       // %bb.0:
1247; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
1248; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
1249; SVE2-NEXT:    splice z0.h, p0, { z0.h, z1.h }
1250; SVE2-NEXT:    ret
1251  %out = call <vscale x 8 x half> @llvm.aarch64.sve.splice.nxv8f16(<vscale x 8 x i1> %pg,
1252                                                                   <vscale x 8 x half> %a,
1253                                                                   <vscale x 8 x half> %b)
1254  ret <vscale x 8 x half> %out
1255}
1256
1257define <vscale x 4 x float> @splice_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
1258; SVE-LABEL: splice_f32:
1259; SVE:       // %bb.0:
1260; SVE-NEXT:    splice z0.s, p0, z0.s, z1.s
1261; SVE-NEXT:    ret
1262;
1263; SVE2-LABEL: splice_f32:
1264; SVE2:       // %bb.0:
1265; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
1266; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
1267; SVE2-NEXT:    splice z0.s, p0, { z0.s, z1.s }
1268; SVE2-NEXT:    ret
1269  %out = call <vscale x 4 x float> @llvm.aarch64.sve.splice.nxv4f32(<vscale x 4 x i1> %pg,
1270                                                                    <vscale x 4 x float> %a,
1271                                                                    <vscale x 4 x float> %b)
1272  ret <vscale x 4 x float> %out
1273}
1274
1275define <vscale x 2 x double> @splice_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
1276; SVE-LABEL: splice_f64:
1277; SVE:       // %bb.0:
1278; SVE-NEXT:    splice z0.d, p0, z0.d, z1.d
1279; SVE-NEXT:    ret
1280;
1281; SVE2-LABEL: splice_f64:
1282; SVE2:       // %bb.0:
1283; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
1284; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
1285; SVE2-NEXT:    splice z0.d, p0, { z0.d, z1.d }
1286; SVE2-NEXT:    ret
1287  %out = call <vscale x 2 x double> @llvm.aarch64.sve.splice.nxv2f64(<vscale x 2 x i1> %pg,
1288                                                                     <vscale x 2 x double> %a,
1289                                                                     <vscale x 2 x double> %b)
1290  ret <vscale x 2 x double> %out
1291}
1292
1293;
1294; SUNPKHI
1295;
1296
1297define <vscale x 8 x i16> @sunpkhi_i16(<vscale x 16 x i8> %a) {
1298; CHECK-LABEL: sunpkhi_i16:
1299; CHECK:       // %bb.0:
1300; CHECK-NEXT:    sunpkhi z0.h, z0.b
1301; CHECK-NEXT:    ret
1302  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.sunpkhi.nxv8i16(<vscale x 16 x i8> %a)
1303  ret <vscale x 8 x i16> %res
1304}
1305
1306define <vscale x 4 x i32> @sunpkhi_i32(<vscale x 8 x i16> %a) {
1307; CHECK-LABEL: sunpkhi_i32:
1308; CHECK:       // %bb.0:
1309; CHECK-NEXT:    sunpkhi z0.s, z0.h
1310; CHECK-NEXT:    ret
1311  %res = call <vscale x 4 x i32> @llvm.aarch64.sve.sunpkhi.nxv4i32(<vscale x 8 x i16> %a)
1312  ret <vscale x 4 x i32> %res
1313}
1314
1315define <vscale x 2 x i64> @sunpkhi_i64(<vscale x 4 x i32> %a) {
1316; CHECK-LABEL: sunpkhi_i64:
1317; CHECK:       // %bb.0:
1318; CHECK-NEXT:    sunpkhi z0.d, z0.s
1319; CHECK-NEXT:    ret
1320  %res = call <vscale x 2 x i64> @llvm.aarch64.sve.sunpkhi.nxv2i64(<vscale x 4 x i32> %a)
1321  ret <vscale x 2 x i64> %res
1322}
1323
1324;
1325; SUNPKLO
1326;
1327
1328define <vscale x 8 x i16> @sunpklo_i16(<vscale x 16 x i8> %a) {
1329; CHECK-LABEL: sunpklo_i16:
1330; CHECK:       // %bb.0:
1331; CHECK-NEXT:    sunpklo z0.h, z0.b
1332; CHECK-NEXT:    ret
1333  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.sunpklo.nxv8i16(<vscale x 16 x i8> %a)
1334  ret <vscale x 8 x i16> %res
1335}
1336
1337define <vscale x 4 x i32> @sunpklo_i32(<vscale x 8 x i16> %a) {
1338; CHECK-LABEL: sunpklo_i32:
1339; CHECK:       // %bb.0:
1340; CHECK-NEXT:    sunpklo z0.s, z0.h
1341; CHECK-NEXT:    ret
1342  %res = call <vscale x 4 x i32> @llvm.aarch64.sve.sunpklo.nxv4i32(<vscale x 8 x i16> %a)
1343  ret <vscale x 4 x i32> %res
1344}
1345
1346define <vscale x 2 x i64> @sunpklo_i64(<vscale x 4 x i32> %a) {
1347; CHECK-LABEL: sunpklo_i64:
1348; CHECK:       // %bb.0:
1349; CHECK-NEXT:    sunpklo z0.d, z0.s
1350; CHECK-NEXT:    ret
1351  %res = call <vscale x 2 x i64> @llvm.aarch64.sve.sunpklo.nxv2i64(<vscale x 4 x i32> %a)
1352  ret <vscale x 2 x i64> %res
1353}
1354
1355;
1356; TBL
1357;
1358
1359define <vscale x 16 x i8> @tbl_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1360; CHECK-LABEL: tbl_i8:
1361; CHECK:       // %bb.0:
1362; CHECK-NEXT:    tbl z0.b, { z0.b }, z1.b
1363; CHECK-NEXT:    ret
1364  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.tbl.nxv16i8(<vscale x 16 x i8> %a,
1365                                                               <vscale x 16 x i8> %b)
1366  ret <vscale x 16 x i8> %out
1367}
1368
1369define <vscale x 8 x i16> @tbl_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1370; CHECK-LABEL: tbl_i16:
1371; CHECK:       // %bb.0:
1372; CHECK-NEXT:    tbl z0.h, { z0.h }, z1.h
1373; CHECK-NEXT:    ret
1374  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.tbl.nxv8i16(<vscale x 8 x i16> %a,
1375                                                               <vscale x 8 x i16> %b)
1376  ret <vscale x 8 x i16> %out
1377}
1378
1379define <vscale x 4 x i32> @tbl_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1380; CHECK-LABEL: tbl_i32:
1381; CHECK:       // %bb.0:
1382; CHECK-NEXT:    tbl z0.s, { z0.s }, z1.s
1383; CHECK-NEXT:    ret
1384  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.tbl.nxv4i32(<vscale x 4 x i32> %a,
1385                                                               <vscale x 4 x i32> %b)
1386  ret <vscale x 4 x i32> %out
1387}
1388
1389define <vscale x 2 x i64> @tbl_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
1390; CHECK-LABEL: tbl_i64:
1391; CHECK:       // %bb.0:
1392; CHECK-NEXT:    tbl z0.d, { z0.d }, z1.d
1393; CHECK-NEXT:    ret
1394  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.tbl.nxv2i64(<vscale x 2 x i64> %a,
1395                                                               <vscale x 2 x i64> %b)
1396  ret <vscale x 2 x i64> %out
1397}
1398
1399define <vscale x 8 x half> @tbl_f16(<vscale x 8 x half> %a, <vscale x 8 x i16> %b) {
1400; CHECK-LABEL: tbl_f16:
1401; CHECK:       // %bb.0:
1402; CHECK-NEXT:    tbl z0.h, { z0.h }, z1.h
1403; CHECK-NEXT:    ret
1404  %out = call <vscale x 8 x half> @llvm.aarch64.sve.tbl.nxv8f16(<vscale x 8 x half> %a,
1405                                                                <vscale x 8 x i16> %b)
1406  ret <vscale x 8 x half> %out
1407}
1408
1409define <vscale x 8 x bfloat> @tbl_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x i16> %b) #0 {
1410; CHECK-LABEL: tbl_bf16:
1411; CHECK:       // %bb.0:
1412; CHECK-NEXT:    tbl z0.h, { z0.h }, z1.h
1413; CHECK-NEXT:    ret
1414  %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.tbl.nxv8bf16(<vscale x 8 x bfloat> %a,
1415                                                                   <vscale x 8 x i16> %b)
1416  ret <vscale x 8 x bfloat> %out
1417}
1418
1419define <vscale x 4 x float> @tbl_f32(<vscale x 4 x float> %a, <vscale x 4 x i32> %b) {
1420; CHECK-LABEL: tbl_f32:
1421; CHECK:       // %bb.0:
1422; CHECK-NEXT:    tbl z0.s, { z0.s }, z1.s
1423; CHECK-NEXT:    ret
1424  %out = call <vscale x 4 x float> @llvm.aarch64.sve.tbl.nxv4f32(<vscale x 4 x float> %a,
1425                                                                 <vscale x 4 x i32> %b)
1426  ret <vscale x 4 x float> %out
1427}
1428
1429define <vscale x 2 x double> @tbl_f64(<vscale x 2 x double> %a, <vscale x 2 x i64> %b) {
1430; CHECK-LABEL: tbl_f64:
1431; CHECK:       // %bb.0:
1432; CHECK-NEXT:    tbl z0.d, { z0.d }, z1.d
1433; CHECK-NEXT:    ret
1434  %out = call <vscale x 2 x double> @llvm.aarch64.sve.tbl.nxv2f64(<vscale x 2 x double> %a,
1435                                                                  <vscale x 2 x i64> %b)
1436  ret <vscale x 2 x double> %out
1437}
1438
1439;
1440; UUNPKHI
1441;
1442
1443define <vscale x 8 x i16> @uunpkhi_i16(<vscale x 16 x i8> %a) {
1444; CHECK-LABEL: uunpkhi_i16:
1445; CHECK:       // %bb.0:
1446; CHECK-NEXT:    uunpkhi z0.h, z0.b
1447; CHECK-NEXT:    ret
1448  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.uunpkhi.nxv8i16(<vscale x 16 x i8> %a)
1449  ret <vscale x 8 x i16> %res
1450}
1451
1452define <vscale x 4 x i32> @uunpkhi_i32(<vscale x 8 x i16> %a) {
1453; CHECK-LABEL: uunpkhi_i32:
1454; CHECK:       // %bb.0:
1455; CHECK-NEXT:    uunpkhi z0.s, z0.h
1456; CHECK-NEXT:    ret
1457  %res = call <vscale x 4 x i32> @llvm.aarch64.sve.uunpkhi.nxv4i32(<vscale x 8 x i16> %a)
1458  ret <vscale x 4 x i32> %res
1459}
1460
1461define <vscale x 2 x i64> @uunpkhi_i64(<vscale x 4 x i32> %a) {
1462; CHECK-LABEL: uunpkhi_i64:
1463; CHECK:       // %bb.0:
1464; CHECK-NEXT:    uunpkhi z0.d, z0.s
1465; CHECK-NEXT:    ret
1466  %res = call <vscale x 2 x i64> @llvm.aarch64.sve.uunpkhi.nxv2i64(<vscale x 4 x i32> %a)
1467  ret <vscale x 2 x i64> %res
1468}
1469
1470;
1471; UUNPKLO
1472;
1473
1474define <vscale x 8 x i16> @uunpklo_i16(<vscale x 16 x i8> %a) {
1475; CHECK-LABEL: uunpklo_i16:
1476; CHECK:       // %bb.0:
1477; CHECK-NEXT:    uunpklo z0.h, z0.b
1478; CHECK-NEXT:    ret
1479  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.uunpklo.nxv8i16(<vscale x 16 x i8> %a)
1480  ret <vscale x 8 x i16> %res
1481}
1482
1483define <vscale x 4 x i32> @uunpklo_i32(<vscale x 8 x i16> %a) {
1484; CHECK-LABEL: uunpklo_i32:
1485; CHECK:       // %bb.0:
1486; CHECK-NEXT:    uunpklo z0.s, z0.h
1487; CHECK-NEXT:    ret
1488  %res = call <vscale x 4 x i32> @llvm.aarch64.sve.uunpklo.nxv4i32(<vscale x 8 x i16> %a)
1489  ret <vscale x 4 x i32> %res
1490}
1491
1492define <vscale x 2 x i64> @uunpklo_i64(<vscale x 4 x i32> %a) {
1493; CHECK-LABEL: uunpklo_i64:
1494; CHECK:       // %bb.0:
1495; CHECK-NEXT:    uunpklo z0.d, z0.s
1496; CHECK-NEXT:    ret
1497  %res = call <vscale x 2 x i64> @llvm.aarch64.sve.uunpklo.nxv2i64(<vscale x 4 x i32> %a)
1498  ret <vscale x 2 x i64> %res
1499}
1500
1501;
1502; TRN1
1503;
1504
1505define <vscale x 16 x i1> @trn1_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1506; CHECK-LABEL: trn1_nxv16i1:
1507; CHECK:       // %bb.0:
1508; CHECK-NEXT:    trn1 p0.b, p0.b, p1.b
1509; CHECK-NEXT:    ret
1510  %out = call <vscale x 16 x i1> @llvm.aarch64.sve.trn1.nxv16i1(<vscale x 16 x i1> %a,
1511                                                                <vscale x 16 x i1> %b)
1512  ret <vscale x 16 x i1> %out
1513}
1514
1515define <vscale x 8 x i1> @trn1_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
1516; CHECK-LABEL: trn1_nxv8i1:
1517; CHECK:       // %bb.0:
1518; CHECK-NEXT:    trn1 p0.h, p0.h, p1.h
1519; CHECK-NEXT:    ret
1520  %out = call <vscale x 8 x i1> @llvm.aarch64.sve.trn1.nxv8i1(<vscale x 8 x i1> %a,
1521                                                              <vscale x 8 x i1> %b)
1522  ret <vscale x 8 x i1> %out
1523}
1524
1525define <vscale x 4 x i1> @trn1_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
1526; CHECK-LABEL: trn1_nxv4i1:
1527; CHECK:       // %bb.0:
1528; CHECK-NEXT:    trn1 p0.s, p0.s, p1.s
1529; CHECK-NEXT:    ret
1530  %out = call <vscale x 4 x i1> @llvm.aarch64.sve.trn1.nxv4i1(<vscale x 4 x i1> %a,
1531                                                              <vscale x 4 x i1> %b)
1532  ret <vscale x 4 x i1> %out
1533}
1534
1535define <vscale x 2 x i1> @trn1_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
1536; CHECK-LABEL: trn1_nxv2i1:
1537; CHECK:       // %bb.0:
1538; CHECK-NEXT:    trn1 p0.d, p0.d, p1.d
1539; CHECK-NEXT:    ret
1540  %out = call <vscale x 2 x i1> @llvm.aarch64.sve.trn1.nxv2i1(<vscale x 2 x i1> %a,
1541                                                              <vscale x 2 x i1> %b)
1542  ret <vscale x 2 x i1> %out
1543}
1544
1545define <vscale x 16 x i1> @trn1_b16(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1546; CHECK-LABEL: trn1_b16:
1547; CHECK:       // %bb.0:
1548; CHECK-NEXT:    trn1 p0.h, p0.h, p1.h
1549; CHECK-NEXT:    ret
1550  %out = call <vscale x 16 x i1> @llvm.aarch64.sve.trn1.b16(<vscale x 16 x i1> %a,
1551                                                            <vscale x 16 x i1> %b)
1552  ret <vscale x 16 x i1> %out
1553}
1554
1555define <vscale x 16 x i1> @trn1_b32(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1556; CHECK-LABEL: trn1_b32:
1557; CHECK:       // %bb.0:
1558; CHECK-NEXT:    trn1 p0.s, p0.s, p1.s
1559; CHECK-NEXT:    ret
1560  %out = call <vscale x 16 x i1> @llvm.aarch64.sve.trn1.b32(<vscale x 16 x i1> %a,
1561                                                            <vscale x 16 x i1> %b)
1562  ret <vscale x 16 x i1> %out
1563}
1564
1565define <vscale x 16 x i1> @trn1_b64(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1566; CHECK-LABEL: trn1_b64:
1567; CHECK:       // %bb.0:
1568; CHECK-NEXT:    trn1 p0.d, p0.d, p1.d
1569; CHECK-NEXT:    ret
1570  %out = call <vscale x 16 x i1> @llvm.aarch64.sve.trn1.b64(<vscale x 16 x i1> %a,
1571                                                            <vscale x 16 x i1> %b)
1572  ret <vscale x 16 x i1> %out
1573}
1574
1575define <vscale x 16 x i8> @trn1_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1576; CHECK-LABEL: trn1_i8:
1577; CHECK:       // %bb.0:
1578; CHECK-NEXT:    trn1 z0.b, z0.b, z1.b
1579; CHECK-NEXT:    ret
1580  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.trn1.nxv16i8(<vscale x 16 x i8> %a,
1581                                                                <vscale x 16 x i8> %b)
1582  ret <vscale x 16 x i8> %out
1583}
1584
1585define <vscale x 8 x i16> @trn1_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1586; CHECK-LABEL: trn1_i16:
1587; CHECK:       // %bb.0:
1588; CHECK-NEXT:    trn1 z0.h, z0.h, z1.h
1589; CHECK-NEXT:    ret
1590  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.trn1.nxv8i16(<vscale x 8 x i16> %a,
1591                                                                <vscale x 8 x i16> %b)
1592  ret <vscale x 8 x i16> %out
1593}
1594
1595define <vscale x 4 x i32> @trn1_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1596; CHECK-LABEL: trn1_i32:
1597; CHECK:       // %bb.0:
1598; CHECK-NEXT:    trn1 z0.s, z0.s, z1.s
1599; CHECK-NEXT:    ret
1600  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.trn1.nxv4i32(<vscale x 4 x i32> %a,
1601                                                                <vscale x 4 x i32> %b)
1602  ret <vscale x 4 x i32> %out
1603}
1604
1605define <vscale x 2 x i64> @trn1_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
1606; CHECK-LABEL: trn1_i64:
1607; CHECK:       // %bb.0:
1608; CHECK-NEXT:    trn1 z0.d, z0.d, z1.d
1609; CHECK-NEXT:    ret
1610  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.trn1.nxv2i64(<vscale x 2 x i64> %a,
1611                                                                <vscale x 2 x i64> %b)
1612  ret <vscale x 2 x i64> %out
1613}
1614
1615define <vscale x 2 x half> @trn1_f16_v2(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
1616; CHECK-LABEL: trn1_f16_v2:
1617; CHECK:       // %bb.0:
1618; CHECK-NEXT:    trn1 z0.d, z0.d, z1.d
1619; CHECK-NEXT:    ret
1620  %out = call <vscale x 2 x half> @llvm.aarch64.sve.trn1.nxv2f16(<vscale x 2 x half> %a,
1621                                                                 <vscale x 2 x half> %b)
1622  ret <vscale x 2 x half> %out
1623}
1624
1625define <vscale x 4 x half> @trn1_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
1626; CHECK-LABEL: trn1_f16_v4:
1627; CHECK:       // %bb.0:
1628; CHECK-NEXT:    trn1 z0.s, z0.s, z1.s
1629; CHECK-NEXT:    ret
1630  %out = call <vscale x 4 x half> @llvm.aarch64.sve.trn1.nxv4f16(<vscale x 4 x half> %a,
1631                                                                 <vscale x 4 x half> %b)
1632  ret <vscale x 4 x half> %out
1633}
1634
1635define <vscale x 8 x bfloat> @trn1_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
1636; CHECK-LABEL: trn1_bf16:
1637; CHECK:       // %bb.0:
1638; CHECK-NEXT:    trn1 z0.h, z0.h, z1.h
1639; CHECK-NEXT:    ret
1640  %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.trn1.nxv8bf16(<vscale x 8 x bfloat> %a,
1641                                                                    <vscale x 8 x bfloat> %b)
1642  ret <vscale x 8 x bfloat> %out
1643}
1644
1645define <vscale x 8 x half> @trn1_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
1646; CHECK-LABEL: trn1_f16:
1647; CHECK:       // %bb.0:
1648; CHECK-NEXT:    trn1 z0.h, z0.h, z1.h
1649; CHECK-NEXT:    ret
1650  %out = call <vscale x 8 x half> @llvm.aarch64.sve.trn1.nxv8f16(<vscale x 8 x half> %a,
1651                                                                 <vscale x 8 x half> %b)
1652  ret <vscale x 8 x half> %out
1653}
1654
1655define <vscale x 2 x float> @trn1_f32_v2(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
1656; CHECK-LABEL: trn1_f32_v2:
1657; CHECK:       // %bb.0:
1658; CHECK-NEXT:    trn1 z0.d, z0.d, z1.d
1659; CHECK-NEXT:    ret
1660  %out = call <vscale x 2 x float> @llvm.aarch64.sve.trn1.nxv2f32(<vscale x 2 x float> %a,
1661                                                                  <vscale x 2 x float> %b)
1662  ret <vscale x 2 x float> %out
1663}
1664
1665define <vscale x 4 x float> @trn1_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
1666; CHECK-LABEL: trn1_f32:
1667; CHECK:       // %bb.0:
1668; CHECK-NEXT:    trn1 z0.s, z0.s, z1.s
1669; CHECK-NEXT:    ret
1670  %out = call <vscale x 4 x float> @llvm.aarch64.sve.trn1.nxv4f32(<vscale x 4 x float> %a,
1671                                                                  <vscale x 4 x float> %b)
1672  ret <vscale x 4 x float> %out
1673}
1674
1675define <vscale x 2 x double> @trn1_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
1676; CHECK-LABEL: trn1_f64:
1677; CHECK:       // %bb.0:
1678; CHECK-NEXT:    trn1 z0.d, z0.d, z1.d
1679; CHECK-NEXT:    ret
1680  %out = call <vscale x 2 x double> @llvm.aarch64.sve.trn1.nxv2f64(<vscale x 2 x double> %a,
1681                                                                   <vscale x 2 x double> %b)
1682  ret <vscale x 2 x double> %out
1683}
1684
1685;
1686; TRN2
1687;
1688
1689define <vscale x 16 x i1> @trn2_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1690; CHECK-LABEL: trn2_nxv16i1:
1691; CHECK:       // %bb.0:
1692; CHECK-NEXT:    trn2 p0.b, p0.b, p1.b
1693; CHECK-NEXT:    ret
1694  %out = call <vscale x 16 x i1> @llvm.aarch64.sve.trn2.nxv16i1(<vscale x 16 x i1> %a,
1695                                                                <vscale x 16 x i1> %b)
1696  ret <vscale x 16 x i1> %out
1697}
1698
1699define <vscale x 8 x i1> @trn2_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
1700; CHECK-LABEL: trn2_nxv8i1:
1701; CHECK:       // %bb.0:
1702; CHECK-NEXT:    trn2 p0.h, p0.h, p1.h
1703; CHECK-NEXT:    ret
1704  %out = call <vscale x 8 x i1> @llvm.aarch64.sve.trn2.nxv8i1(<vscale x 8 x i1> %a,
1705                                                              <vscale x 8 x i1> %b)
1706  ret <vscale x 8 x i1> %out
1707}
1708
1709define <vscale x 4 x i1> @trn2_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
1710; CHECK-LABEL: trn2_nxv4i1:
1711; CHECK:       // %bb.0:
1712; CHECK-NEXT:    trn2 p0.s, p0.s, p1.s
1713; CHECK-NEXT:    ret
1714  %out = call <vscale x 4 x i1> @llvm.aarch64.sve.trn2.nxv4i1(<vscale x 4 x i1> %a,
1715                                                              <vscale x 4 x i1> %b)
1716  ret <vscale x 4 x i1> %out
1717}
1718
1719define <vscale x 2 x i1> @trn2_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
1720; CHECK-LABEL: trn2_nxv2i1:
1721; CHECK:       // %bb.0:
1722; CHECK-NEXT:    trn2 p0.d, p0.d, p1.d
1723; CHECK-NEXT:    ret
1724  %out = call <vscale x 2 x i1> @llvm.aarch64.sve.trn2.nxv2i1(<vscale x 2 x i1> %a,
1725                                                              <vscale x 2 x i1> %b)
1726  ret <vscale x 2 x i1> %out
1727}
1728
1729define <vscale x 16 x i1> @trn2_b16(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1730; CHECK-LABEL: trn2_b16:
1731; CHECK:       // %bb.0:
1732; CHECK-NEXT:    trn2 p0.h, p0.h, p1.h
1733; CHECK-NEXT:    ret
1734  %out = call <vscale x 16 x i1> @llvm.aarch64.sve.trn2.b16(<vscale x 16 x i1> %a,
1735                                                            <vscale x 16 x i1> %b)
1736  ret <vscale x 16 x i1> %out
1737}
1738
1739define <vscale x 16 x i1> @trn2_b32(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1740; CHECK-LABEL: trn2_b32:
1741; CHECK:       // %bb.0:
1742; CHECK-NEXT:    trn2 p0.s, p0.s, p1.s
1743; CHECK-NEXT:    ret
1744  %out = call <vscale x 16 x i1> @llvm.aarch64.sve.trn2.b32(<vscale x 16 x i1> %a,
1745                                                            <vscale x 16 x i1> %b)
1746  ret <vscale x 16 x i1> %out
1747}
1748
1749define <vscale x 16 x i1> @trn2_b64(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1750; CHECK-LABEL: trn2_b64:
1751; CHECK:       // %bb.0:
1752; CHECK-NEXT:    trn2 p0.d, p0.d, p1.d
1753; CHECK-NEXT:    ret
1754  %out = call <vscale x 16 x i1> @llvm.aarch64.sve.trn2.b64(<vscale x 16 x i1> %a,
1755                                                            <vscale x 16 x i1> %b)
1756  ret <vscale x 16 x i1> %out
1757}
1758
1759define <vscale x 16 x i8> @trn2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1760; CHECK-LABEL: trn2_i8:
1761; CHECK:       // %bb.0:
1762; CHECK-NEXT:    trn2 z0.b, z0.b, z1.b
1763; CHECK-NEXT:    ret
1764  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.trn2.nxv16i8(<vscale x 16 x i8> %a,
1765                                                                <vscale x 16 x i8> %b)
1766  ret <vscale x 16 x i8> %out
1767}
1768
1769define <vscale x 8 x i16> @trn2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1770; CHECK-LABEL: trn2_i16:
1771; CHECK:       // %bb.0:
1772; CHECK-NEXT:    trn2 z0.h, z0.h, z1.h
1773; CHECK-NEXT:    ret
1774  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.trn2.nxv8i16(<vscale x 8 x i16> %a,
1775                                                                <vscale x 8 x i16> %b)
1776  ret <vscale x 8 x i16> %out
1777}
1778
1779define <vscale x 4 x i32> @trn2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1780; CHECK-LABEL: trn2_i32:
1781; CHECK:       // %bb.0:
1782; CHECK-NEXT:    trn2 z0.s, z0.s, z1.s
1783; CHECK-NEXT:    ret
1784  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.trn2.nxv4i32(<vscale x 4 x i32> %a,
1785                                                                <vscale x 4 x i32> %b)
1786  ret <vscale x 4 x i32> %out
1787}
1788
1789define <vscale x 2 x i64> @trn2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
1790; CHECK-LABEL: trn2_i64:
1791; CHECK:       // %bb.0:
1792; CHECK-NEXT:    trn2 z0.d, z0.d, z1.d
1793; CHECK-NEXT:    ret
1794  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.trn2.nxv2i64(<vscale x 2 x i64> %a,
1795                                                                <vscale x 2 x i64> %b)
1796  ret <vscale x 2 x i64> %out
1797}
1798
1799define <vscale x 2 x half> @trn2_f16_v2(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
1800; CHECK-LABEL: trn2_f16_v2:
1801; CHECK:       // %bb.0:
1802; CHECK-NEXT:    trn2 z0.d, z0.d, z1.d
1803; CHECK-NEXT:    ret
1804  %out = call <vscale x 2 x half> @llvm.aarch64.sve.trn2.nxv2f16(<vscale x 2 x half> %a,
1805                                                                 <vscale x 2 x half> %b)
1806  ret <vscale x 2 x half> %out
1807}
1808
1809define <vscale x 4 x half> @trn2_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
1810; CHECK-LABEL: trn2_f16_v4:
1811; CHECK:       // %bb.0:
1812; CHECK-NEXT:    trn2 z0.s, z0.s, z1.s
1813; CHECK-NEXT:    ret
1814  %out = call <vscale x 4 x half> @llvm.aarch64.sve.trn2.nxv4f16(<vscale x 4 x half> %a,
1815                                                                 <vscale x 4 x half> %b)
1816  ret <vscale x 4 x half> %out
1817}
1818
1819define <vscale x 8 x bfloat> @trn2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
1820; CHECK-LABEL: trn2_bf16:
1821; CHECK:       // %bb.0:
1822; CHECK-NEXT:    trn2 z0.h, z0.h, z1.h
1823; CHECK-NEXT:    ret
1824  %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.trn2.nxv8bf16(<vscale x 8 x bfloat> %a,
1825                                                                    <vscale x 8 x bfloat> %b)
1826  ret <vscale x 8 x bfloat> %out
1827}
1828
1829define <vscale x 8 x half> @trn2_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
1830; CHECK-LABEL: trn2_f16:
1831; CHECK:       // %bb.0:
1832; CHECK-NEXT:    trn2 z0.h, z0.h, z1.h
1833; CHECK-NEXT:    ret
1834  %out = call <vscale x 8 x half> @llvm.aarch64.sve.trn2.nxv8f16(<vscale x 8 x half> %a,
1835                                                                 <vscale x 8 x half> %b)
1836  ret <vscale x 8 x half> %out
1837}
1838
1839define <vscale x 2 x float> @trn2_f32_v2(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
1840; CHECK-LABEL: trn2_f32_v2:
1841; CHECK:       // %bb.0:
1842; CHECK-NEXT:    trn2 z0.d, z0.d, z1.d
1843; CHECK-NEXT:    ret
1844  %out = call <vscale x 2 x float> @llvm.aarch64.sve.trn2.nxv2f32(<vscale x 2 x float> %a,
1845                                                                  <vscale x 2 x float> %b)
1846  ret <vscale x 2 x float> %out
1847}
1848
1849define <vscale x 4 x float> @trn2_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
1850; CHECK-LABEL: trn2_f32:
1851; CHECK:       // %bb.0:
1852; CHECK-NEXT:    trn2 z0.s, z0.s, z1.s
1853; CHECK-NEXT:    ret
1854  %out = call <vscale x 4 x float> @llvm.aarch64.sve.trn2.nxv4f32(<vscale x 4 x float> %a,
1855                                                                  <vscale x 4 x float> %b)
1856  ret <vscale x 4 x float> %out
1857}
1858
1859define <vscale x 2 x double> @trn2_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
1860; CHECK-LABEL: trn2_f64:
1861; CHECK:       // %bb.0:
1862; CHECK-NEXT:    trn2 z0.d, z0.d, z1.d
1863; CHECK-NEXT:    ret
1864  %out = call <vscale x 2 x double> @llvm.aarch64.sve.trn2.nxv2f64(<vscale x 2 x double> %a,
1865                                                                   <vscale x 2 x double> %b)
1866  ret <vscale x 2 x double> %out
1867}
1868
1869;
1870; UZP1
1871;
1872
1873define <vscale x 16 x i1> @uzp1_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1874; CHECK-LABEL: uzp1_nxv16i1:
1875; CHECK:       // %bb.0:
1876; CHECK-NEXT:    uzp1 p0.b, p0.b, p1.b
1877; CHECK-NEXT:    ret
1878  %out = call <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.nxv16i1(<vscale x 16 x i1> %a,
1879                                                                <vscale x 16 x i1> %b)
1880  ret <vscale x 16 x i1> %out
1881}
1882
1883define <vscale x 8 x i1> @uzp1_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
1884; CHECK-LABEL: uzp1_nxv8i1:
1885; CHECK:       // %bb.0:
1886; CHECK-NEXT:    uzp1 p0.h, p0.h, p1.h
1887; CHECK-NEXT:    ret
1888  %out = call <vscale x 8 x i1> @llvm.aarch64.sve.uzp1.nxv8i1(<vscale x 8 x i1> %a,
1889                                                              <vscale x 8 x i1> %b)
1890  ret <vscale x 8 x i1> %out
1891}
1892
1893define <vscale x 4 x i1> @uzp1_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
1894; CHECK-LABEL: uzp1_nxv4i1:
1895; CHECK:       // %bb.0:
1896; CHECK-NEXT:    uzp1 p0.s, p0.s, p1.s
1897; CHECK-NEXT:    ret
1898  %out = call <vscale x 4 x i1> @llvm.aarch64.sve.uzp1.nxv4i1(<vscale x 4 x i1> %a,
1899                                                              <vscale x 4 x i1> %b)
1900  ret <vscale x 4 x i1> %out
1901}
1902
1903define <vscale x 2 x i1> @uzp1_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
1904; CHECK-LABEL: uzp1_nxv2i1:
1905; CHECK:       // %bb.0:
1906; CHECK-NEXT:    uzp1 p0.d, p0.d, p1.d
1907; CHECK-NEXT:    ret
1908  %out = call <vscale x 2 x i1> @llvm.aarch64.sve.uzp1.nxv2i1(<vscale x 2 x i1> %a,
1909                                                              <vscale x 2 x i1> %b)
1910  ret <vscale x 2 x i1> %out
1911}
1912
1913define <vscale x 16 x i1> @uzp1_b16(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1914; CHECK-LABEL: uzp1_b16:
1915; CHECK:       // %bb.0:
1916; CHECK-NEXT:    uzp1 p0.h, p0.h, p1.h
1917; CHECK-NEXT:    ret
1918  %out = call <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.b16(<vscale x 16 x i1> %a,
1919                                                            <vscale x 16 x i1> %b)
1920  ret <vscale x 16 x i1> %out
1921}
1922
1923define <vscale x 16 x i1> @uzp1_b32(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1924; CHECK-LABEL: uzp1_b32:
1925; CHECK:       // %bb.0:
1926; CHECK-NEXT:    uzp1 p0.s, p0.s, p1.s
1927; CHECK-NEXT:    ret
1928  %out = call <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.b32(<vscale x 16 x i1> %a,
1929                                                            <vscale x 16 x i1> %b)
1930  ret <vscale x 16 x i1> %out
1931}
1932
1933define <vscale x 16 x i1> @uzp1_b64(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1934; CHECK-LABEL: uzp1_b64:
1935; CHECK:       // %bb.0:
1936; CHECK-NEXT:    uzp1 p0.d, p0.d, p1.d
1937; CHECK-NEXT:    ret
1938  %out = call <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.b64(<vscale x 16 x i1> %a,
1939                                                            <vscale x 16 x i1> %b)
1940  ret <vscale x 16 x i1> %out
1941}
1942
1943define <vscale x 16 x i8> @uzp1_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1944; CHECK-LABEL: uzp1_i8:
1945; CHECK:       // %bb.0:
1946; CHECK-NEXT:    uzp1 z0.b, z0.b, z1.b
1947; CHECK-NEXT:    ret
1948  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uzp1.nxv16i8(<vscale x 16 x i8> %a,
1949                                                                <vscale x 16 x i8> %b)
1950  ret <vscale x 16 x i8> %out
1951}
1952
1953define <vscale x 8 x i16> @uzp1_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1954; CHECK-LABEL: uzp1_i16:
1955; CHECK:       // %bb.0:
1956; CHECK-NEXT:    uzp1 z0.h, z0.h, z1.h
1957; CHECK-NEXT:    ret
1958  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uzp1.nxv8i16(<vscale x 8 x i16> %a,
1959                                                                <vscale x 8 x i16> %b)
1960  ret <vscale x 8 x i16> %out
1961}
1962
1963define <vscale x 4 x i32> @uzp1_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1964; CHECK-LABEL: uzp1_i32:
1965; CHECK:       // %bb.0:
1966; CHECK-NEXT:    uzp1 z0.s, z0.s, z1.s
1967; CHECK-NEXT:    ret
1968  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uzp1.nxv4i32(<vscale x 4 x i32> %a,
1969                                                                <vscale x 4 x i32> %b)
1970  ret <vscale x 4 x i32> %out
1971}
1972
1973define <vscale x 2 x i64> @uzp1_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
1974; CHECK-LABEL: uzp1_i64:
1975; CHECK:       // %bb.0:
1976; CHECK-NEXT:    uzp1 z0.d, z0.d, z1.d
1977; CHECK-NEXT:    ret
1978  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uzp1.nxv2i64(<vscale x 2 x i64> %a,
1979                                                                <vscale x 2 x i64> %b)
1980  ret <vscale x 2 x i64> %out
1981}
1982
1983define <vscale x 2 x half> @uzp1_f16_v2(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
1984; CHECK-LABEL: uzp1_f16_v2:
1985; CHECK:       // %bb.0:
1986; CHECK-NEXT:    uzp1 z0.d, z0.d, z1.d
1987; CHECK-NEXT:    ret
1988  %out = call <vscale x 2 x half> @llvm.aarch64.sve.uzp1.nxv2f16(<vscale x 2 x half> %a,
1989                                                                 <vscale x 2 x half> %b)
1990  ret <vscale x 2 x half> %out
1991}
1992
1993define <vscale x 4 x half> @uzp1_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
1994; CHECK-LABEL: uzp1_f16_v4:
1995; CHECK:       // %bb.0:
1996; CHECK-NEXT:    uzp1 z0.s, z0.s, z1.s
1997; CHECK-NEXT:    ret
1998  %out = call <vscale x 4 x half> @llvm.aarch64.sve.uzp1.nxv4f16(<vscale x 4 x half> %a,
1999                                                                 <vscale x 4 x half> %b)
2000  ret <vscale x 4 x half> %out
2001}
2002
2003define <vscale x 8 x bfloat> @uzp1_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
2004; CHECK-LABEL: uzp1_bf16:
2005; CHECK:       // %bb.0:
2006; CHECK-NEXT:    uzp1 z0.h, z0.h, z1.h
2007; CHECK-NEXT:    ret
2008  %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzp1.nxv8bf16(<vscale x 8 x bfloat> %a,
2009                                                                    <vscale x 8 x bfloat> %b)
2010  ret <vscale x 8 x bfloat> %out
2011}
2012
2013define <vscale x 8 x half> @uzp1_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
2014; CHECK-LABEL: uzp1_f16:
2015; CHECK:       // %bb.0:
2016; CHECK-NEXT:    uzp1 z0.h, z0.h, z1.h
2017; CHECK-NEXT:    ret
2018  %out = call <vscale x 8 x half> @llvm.aarch64.sve.uzp1.nxv8f16(<vscale x 8 x half> %a,
2019                                                                 <vscale x 8 x half> %b)
2020  ret <vscale x 8 x half> %out
2021}
2022
2023define <vscale x 2 x float> @uzp1_f32_v2(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
2024; CHECK-LABEL: uzp1_f32_v2:
2025; CHECK:       // %bb.0:
2026; CHECK-NEXT:    uzp1 z0.d, z0.d, z1.d
2027; CHECK-NEXT:    ret
2028  %out = call <vscale x 2 x float> @llvm.aarch64.sve.uzp1.nxv2f32(<vscale x 2 x float> %a,
2029                                                                  <vscale x 2 x float> %b)
2030  ret <vscale x 2 x float> %out
2031}
2032
2033define <vscale x 4 x float> @uzp1_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
2034; CHECK-LABEL: uzp1_f32:
2035; CHECK:       // %bb.0:
2036; CHECK-NEXT:    uzp1 z0.s, z0.s, z1.s
2037; CHECK-NEXT:    ret
2038  %out = call <vscale x 4 x float> @llvm.aarch64.sve.uzp1.nxv4f32(<vscale x 4 x float> %a,
2039                                                                  <vscale x 4 x float> %b)
2040  ret <vscale x 4 x float> %out
2041}
2042
2043define <vscale x 2 x double> @uzp1_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
2044; CHECK-LABEL: uzp1_f64:
2045; CHECK:       // %bb.0:
2046; CHECK-NEXT:    uzp1 z0.d, z0.d, z1.d
2047; CHECK-NEXT:    ret
2048  %out = call <vscale x 2 x double> @llvm.aarch64.sve.uzp1.nxv2f64(<vscale x 2 x double> %a,
2049                                                                   <vscale x 2 x double> %b)
2050  ret <vscale x 2 x double> %out
2051}
2052
2053;
2054; UZP2
2055;
2056
2057define <vscale x 16 x i1> @uzp2_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
2058; CHECK-LABEL: uzp2_nxv16i1:
2059; CHECK:       // %bb.0:
2060; CHECK-NEXT:    uzp2 p0.b, p0.b, p1.b
2061; CHECK-NEXT:    ret
2062  %out = call <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.nxv16i1(<vscale x 16 x i1> %a,
2063                                                                <vscale x 16 x i1> %b)
2064  ret <vscale x 16 x i1> %out
2065}
2066
2067define <vscale x 8 x i1> @uzp2_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
2068; CHECK-LABEL: uzp2_nxv8i1:
2069; CHECK:       // %bb.0:
2070; CHECK-NEXT:    uzp2 p0.h, p0.h, p1.h
2071; CHECK-NEXT:    ret
2072  %out = call <vscale x 8 x i1> @llvm.aarch64.sve.uzp2.nxv8i1(<vscale x 8 x i1> %a,
2073                                                              <vscale x 8 x i1> %b)
2074  ret <vscale x 8 x i1> %out
2075}
2076
2077define <vscale x 4 x i1> @uzp2_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
2078; CHECK-LABEL: uzp2_nxv4i1:
2079; CHECK:       // %bb.0:
2080; CHECK-NEXT:    uzp2 p0.s, p0.s, p1.s
2081; CHECK-NEXT:    ret
2082  %out = call <vscale x 4 x i1> @llvm.aarch64.sve.uzp2.nxv4i1(<vscale x 4 x i1> %a,
2083                                                              <vscale x 4 x i1> %b)
2084  ret <vscale x 4 x i1> %out
2085}
2086
2087define <vscale x 2 x i1> @uzp2_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
2088; CHECK-LABEL: uzp2_nxv2i1:
2089; CHECK:       // %bb.0:
2090; CHECK-NEXT:    uzp2 p0.d, p0.d, p1.d
2091; CHECK-NEXT:    ret
2092  %out = call <vscale x 2 x i1> @llvm.aarch64.sve.uzp2.nxv2i1(<vscale x 2 x i1> %a,
2093                                                              <vscale x 2 x i1> %b)
2094  ret <vscale x 2 x i1> %out
2095}
2096
2097define <vscale x 16 x i1> @uzp2_b16(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
2098; CHECK-LABEL: uzp2_b16:
2099; CHECK:       // %bb.0:
2100; CHECK-NEXT:    uzp2 p0.h, p0.h, p1.h
2101; CHECK-NEXT:    ret
2102  %out = call <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.b16(<vscale x 16 x i1> %a,
2103                                                            <vscale x 16 x i1> %b)
2104  ret <vscale x 16 x i1> %out
2105}
2106
2107define <vscale x 16 x i1> @uzp2_b32(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
2108; CHECK-LABEL: uzp2_b32:
2109; CHECK:       // %bb.0:
2110; CHECK-NEXT:    uzp2 p0.s, p0.s, p1.s
2111; CHECK-NEXT:    ret
2112  %out = call <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.b32(<vscale x 16 x i1> %a,
2113                                                            <vscale x 16 x i1> %b)
2114  ret <vscale x 16 x i1> %out
2115}
2116
2117define <vscale x 16 x i1> @uzp2_b64(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
2118; CHECK-LABEL: uzp2_b64:
2119; CHECK:       // %bb.0:
2120; CHECK-NEXT:    uzp2 p0.d, p0.d, p1.d
2121; CHECK-NEXT:    ret
2122  %out = call <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.b64(<vscale x 16 x i1> %a,
2123                                                            <vscale x 16 x i1> %b)
2124  ret <vscale x 16 x i1> %out
2125}
2126
2127define <vscale x 16 x i8> @uzp2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
2128; CHECK-LABEL: uzp2_i8:
2129; CHECK:       // %bb.0:
2130; CHECK-NEXT:    uzp2 z0.b, z0.b, z1.b
2131; CHECK-NEXT:    ret
2132  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uzp2.nxv16i8(<vscale x 16 x i8> %a,
2133                                                                <vscale x 16 x i8> %b)
2134  ret <vscale x 16 x i8> %out
2135}
2136
2137define <vscale x 8 x i16> @uzp2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
2138; CHECK-LABEL: uzp2_i16:
2139; CHECK:       // %bb.0:
2140; CHECK-NEXT:    uzp2 z0.h, z0.h, z1.h
2141; CHECK-NEXT:    ret
2142  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uzp2.nxv8i16(<vscale x 8 x i16> %a,
2143                                                                <vscale x 8 x i16> %b)
2144  ret <vscale x 8 x i16> %out
2145}
2146
2147define <vscale x 4 x i32> @uzp2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
2148; CHECK-LABEL: uzp2_i32:
2149; CHECK:       // %bb.0:
2150; CHECK-NEXT:    uzp2 z0.s, z0.s, z1.s
2151; CHECK-NEXT:    ret
2152  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uzp2.nxv4i32(<vscale x 4 x i32> %a,
2153                                                                <vscale x 4 x i32> %b)
2154  ret <vscale x 4 x i32> %out
2155}
2156
2157define <vscale x 2 x i64> @uzp2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
2158; CHECK-LABEL: uzp2_i64:
2159; CHECK:       // %bb.0:
2160; CHECK-NEXT:    uzp2 z0.d, z0.d, z1.d
2161; CHECK-NEXT:    ret
2162  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uzp2.nxv2i64(<vscale x 2 x i64> %a,
2163                                                                <vscale x 2 x i64> %b)
2164  ret <vscale x 2 x i64> %out
2165}
2166
2167define <vscale x 2 x half> @uzp2_f16_v2(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
2168; CHECK-LABEL: uzp2_f16_v2:
2169; CHECK:       // %bb.0:
2170; CHECK-NEXT:    uzp2 z0.d, z0.d, z1.d
2171; CHECK-NEXT:    ret
2172  %out = call <vscale x 2 x half> @llvm.aarch64.sve.uzp2.nxv2f16(<vscale x 2 x half> %a,
2173                                                                 <vscale x 2 x half> %b)
2174  ret <vscale x 2 x half> %out
2175}
2176
2177define <vscale x 4 x half> @uzp2_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
2178; CHECK-LABEL: uzp2_f16_v4:
2179; CHECK:       // %bb.0:
2180; CHECK-NEXT:    uzp2 z0.s, z0.s, z1.s
2181; CHECK-NEXT:    ret
2182  %out = call <vscale x 4 x half> @llvm.aarch64.sve.uzp2.nxv4f16(<vscale x 4 x half> %a,
2183                                                                 <vscale x 4 x half> %b)
2184  ret <vscale x 4 x half> %out
2185}
2186
2187define <vscale x 8 x bfloat> @uzp2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
2188; CHECK-LABEL: uzp2_bf16:
2189; CHECK:       // %bb.0:
2190; CHECK-NEXT:    uzp2 z0.h, z0.h, z1.h
2191; CHECK-NEXT:    ret
2192  %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzp2.nxv8bf16(<vscale x 8 x bfloat> %a,
2193                                                                    <vscale x 8 x bfloat> %b)
2194  ret <vscale x 8 x bfloat> %out
2195}
2196
2197define <vscale x 8 x half> @uzp2_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
2198; CHECK-LABEL: uzp2_f16:
2199; CHECK:       // %bb.0:
2200; CHECK-NEXT:    uzp2 z0.h, z0.h, z1.h
2201; CHECK-NEXT:    ret
2202  %out = call <vscale x 8 x half> @llvm.aarch64.sve.uzp2.nxv8f16(<vscale x 8 x half> %a,
2203                                                                 <vscale x 8 x half> %b)
2204  ret <vscale x 8 x half> %out
2205}
2206
2207define <vscale x 2 x float> @uzp2_f32_v2(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
2208; CHECK-LABEL: uzp2_f32_v2:
2209; CHECK:       // %bb.0:
2210; CHECK-NEXT:    uzp2 z0.d, z0.d, z1.d
2211; CHECK-NEXT:    ret
2212  %out = call <vscale x 2 x float> @llvm.aarch64.sve.uzp2.nxv2f32(<vscale x 2 x float> %a,
2213                                                                  <vscale x 2 x float> %b)
2214  ret <vscale x 2 x float> %out
2215}
2216
2217define <vscale x 4 x float> @uzp2_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
2218; CHECK-LABEL: uzp2_f32:
2219; CHECK:       // %bb.0:
2220; CHECK-NEXT:    uzp2 z0.s, z0.s, z1.s
2221; CHECK-NEXT:    ret
2222  %out = call <vscale x 4 x float> @llvm.aarch64.sve.uzp2.nxv4f32(<vscale x 4 x float> %a,
2223                                                                  <vscale x 4 x float> %b)
2224  ret <vscale x 4 x float> %out
2225}
2226
2227define <vscale x 2 x double> @uzp2_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
2228; CHECK-LABEL: uzp2_f64:
2229; CHECK:       // %bb.0:
2230; CHECK-NEXT:    uzp2 z0.d, z0.d, z1.d
2231; CHECK-NEXT:    ret
2232  %out = call <vscale x 2 x double> @llvm.aarch64.sve.uzp2.nxv2f64(<vscale x 2 x double> %a,
2233                                                                   <vscale x 2 x double> %b)
2234  ret <vscale x 2 x double> %out
2235}
2236
2237;
2238; ZIP1
2239;
2240
2241define <vscale x 16 x i1> @zip1_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
2242; CHECK-LABEL: zip1_nxv16i1:
2243; CHECK:       // %bb.0:
2244; CHECK-NEXT:    zip1 p0.b, p0.b, p1.b
2245; CHECK-NEXT:    ret
2246  %out = call <vscale x 16 x i1> @llvm.aarch64.sve.zip1.nxv16i1(<vscale x 16 x i1> %a,
2247                                                                <vscale x 16 x i1> %b)
2248  ret <vscale x 16 x i1> %out
2249}
2250
2251define <vscale x 8 x i1> @zip1_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
2252; CHECK-LABEL: zip1_nxv8i1:
2253; CHECK:       // %bb.0:
2254; CHECK-NEXT:    zip1 p0.h, p0.h, p1.h
2255; CHECK-NEXT:    ret
2256  %out = call <vscale x 8 x i1> @llvm.aarch64.sve.zip1.nxv8i1(<vscale x 8 x i1> %a,
2257                                                              <vscale x 8 x i1> %b)
2258  ret <vscale x 8 x i1> %out
2259}
2260
2261define <vscale x 4 x i1> @zip1_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
2262; CHECK-LABEL: zip1_nxv4i1:
2263; CHECK:       // %bb.0:
2264; CHECK-NEXT:    zip1 p0.s, p0.s, p1.s
2265; CHECK-NEXT:    ret
2266  %out = call <vscale x 4 x i1> @llvm.aarch64.sve.zip1.nxv4i1(<vscale x 4 x i1> %a,
2267                                                              <vscale x 4 x i1> %b)
2268  ret <vscale x 4 x i1> %out
2269}
2270
2271define <vscale x 2 x i1> @zip1_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
2272; CHECK-LABEL: zip1_nxv2i1:
2273; CHECK:       // %bb.0:
2274; CHECK-NEXT:    zip1 p0.d, p0.d, p1.d
2275; CHECK-NEXT:    ret
2276  %out = call <vscale x 2 x i1> @llvm.aarch64.sve.zip1.nxv2i1(<vscale x 2 x i1> %a,
2277                                                              <vscale x 2 x i1> %b)
2278  ret <vscale x 2 x i1> %out
2279}
2280
2281define <vscale x 16 x i1> @zip1_b16(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
2282; CHECK-LABEL: zip1_b16:
2283; CHECK:       // %bb.0:
2284; CHECK-NEXT:    zip1 p0.h, p0.h, p1.h
2285; CHECK-NEXT:    ret
2286  %out = call <vscale x 16 x i1> @llvm.aarch64.sve.zip1.b16(<vscale x 16 x i1> %a,
2287                                                            <vscale x 16 x i1> %b)
2288  ret <vscale x 16 x i1> %out
2289}
2290
2291define <vscale x 16 x i1> @zip1_b32(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
2292; CHECK-LABEL: zip1_b32:
2293; CHECK:       // %bb.0:
2294; CHECK-NEXT:    zip1 p0.s, p0.s, p1.s
2295; CHECK-NEXT:    ret
2296  %out = call <vscale x 16 x i1> @llvm.aarch64.sve.zip1.b32(<vscale x 16 x i1> %a,
2297                                                            <vscale x 16 x i1> %b)
2298  ret <vscale x 16 x i1> %out
2299}
2300
2301define <vscale x 16 x i1> @zip1_b64(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
2302; CHECK-LABEL: zip1_b64:
2303; CHECK:       // %bb.0:
2304; CHECK-NEXT:    zip1 p0.d, p0.d, p1.d
2305; CHECK-NEXT:    ret
2306  %out = call <vscale x 16 x i1> @llvm.aarch64.sve.zip1.b64(<vscale x 16 x i1> %a,
2307                                                            <vscale x 16 x i1> %b)
2308  ret <vscale x 16 x i1> %out
2309}
2310
2311define <vscale x 16 x i8> @zip1_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
2312; CHECK-LABEL: zip1_i8:
2313; CHECK:       // %bb.0:
2314; CHECK-NEXT:    zip1 z0.b, z0.b, z1.b
2315; CHECK-NEXT:    ret
2316  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.zip1.nxv16i8(<vscale x 16 x i8> %a,
2317                                                                <vscale x 16 x i8> %b)
2318  ret <vscale x 16 x i8> %out
2319}
2320
2321define <vscale x 8 x i16> @zip1_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
2322; CHECK-LABEL: zip1_i16:
2323; CHECK:       // %bb.0:
2324; CHECK-NEXT:    zip1 z0.h, z0.h, z1.h
2325; CHECK-NEXT:    ret
2326  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.zip1.nxv8i16(<vscale x 8 x i16> %a,
2327                                                                <vscale x 8 x i16> %b)
2328  ret <vscale x 8 x i16> %out
2329}
2330
2331define <vscale x 4 x i32> @zip1_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
2332; CHECK-LABEL: zip1_i32:
2333; CHECK:       // %bb.0:
2334; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
2335; CHECK-NEXT:    ret
2336  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.zip1.nxv4i32(<vscale x 4 x i32> %a,
2337                                                                <vscale x 4 x i32> %b)
2338  ret <vscale x 4 x i32> %out
2339}
2340
2341define <vscale x 2 x i64> @zip1_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
2342; CHECK-LABEL: zip1_i64:
2343; CHECK:       // %bb.0:
2344; CHECK-NEXT:    zip1 z0.d, z0.d, z1.d
2345; CHECK-NEXT:    ret
2346  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.zip1.nxv2i64(<vscale x 2 x i64> %a,
2347                                                                <vscale x 2 x i64> %b)
2348  ret <vscale x 2 x i64> %out
2349}
2350
2351define <vscale x 2 x half> @zip1_f16_v2(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
2352; CHECK-LABEL: zip1_f16_v2:
2353; CHECK:       // %bb.0:
2354; CHECK-NEXT:    zip1 z0.d, z0.d, z1.d
2355; CHECK-NEXT:    ret
2356  %out = call <vscale x 2 x half> @llvm.aarch64.sve.zip1.nxv2f16(<vscale x 2 x half> %a,
2357                                                                 <vscale x 2 x half> %b)
2358  ret <vscale x 2 x half> %out
2359}
2360
2361define <vscale x 4 x half> @zip1_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
2362; CHECK-LABEL: zip1_f16_v4:
2363; CHECK:       // %bb.0:
2364; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
2365; CHECK-NEXT:    ret
2366  %out = call <vscale x 4 x half> @llvm.aarch64.sve.zip1.nxv4f16(<vscale x 4 x half> %a,
2367                                                                 <vscale x 4 x half> %b)
2368  ret <vscale x 4 x half> %out
2369}
2370
2371define <vscale x 8 x bfloat> @zip1_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
2372; CHECK-LABEL: zip1_bf16:
2373; CHECK:       // %bb.0:
2374; CHECK-NEXT:    zip1 z0.h, z0.h, z1.h
2375; CHECK-NEXT:    ret
2376  %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.zip1.nxv8bf16(<vscale x 8 x bfloat> %a,
2377                                                                    <vscale x 8 x bfloat> %b)
2378  ret <vscale x 8 x bfloat> %out
2379}
2380
2381define <vscale x 8 x half> @zip1_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
2382; CHECK-LABEL: zip1_f16:
2383; CHECK:       // %bb.0:
2384; CHECK-NEXT:    zip1 z0.h, z0.h, z1.h
2385; CHECK-NEXT:    ret
2386  %out = call <vscale x 8 x half> @llvm.aarch64.sve.zip1.nxv8f16(<vscale x 8 x half> %a,
2387                                                                 <vscale x 8 x half> %b)
2388  ret <vscale x 8 x half> %out
2389}
2390
2391define <vscale x 2 x float> @zip1_f32_v2(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
2392; CHECK-LABEL: zip1_f32_v2:
2393; CHECK:       // %bb.0:
2394; CHECK-NEXT:    zip1 z0.d, z0.d, z1.d
2395; CHECK-NEXT:    ret
2396  %out = call <vscale x 2 x float> @llvm.aarch64.sve.zip1.nxv2f32(<vscale x 2 x float> %a,
2397                                                                  <vscale x 2 x float> %b)
2398  ret <vscale x 2 x float> %out
2399}
2400
2401define <vscale x 4 x float> @zip1_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
2402; CHECK-LABEL: zip1_f32:
2403; CHECK:       // %bb.0:
2404; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
2405; CHECK-NEXT:    ret
2406  %out = call <vscale x 4 x float> @llvm.aarch64.sve.zip1.nxv4f32(<vscale x 4 x float> %a,
2407                                                                  <vscale x 4 x float> %b)
2408  ret <vscale x 4 x float> %out
2409}
2410
2411define <vscale x 2 x double> @zip1_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
2412; CHECK-LABEL: zip1_f64:
2413; CHECK:       // %bb.0:
2414; CHECK-NEXT:    zip1 z0.d, z0.d, z1.d
2415; CHECK-NEXT:    ret
2416  %out = call <vscale x 2 x double> @llvm.aarch64.sve.zip1.nxv2f64(<vscale x 2 x double> %a,
2417                                                                   <vscale x 2 x double> %b)
2418  ret <vscale x 2 x double> %out
2419}
2420
2421;
2422; ZIP2
2423;
2424
2425define <vscale x 16 x i1> @zip2_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
2426; CHECK-LABEL: zip2_nxv16i1:
2427; CHECK:       // %bb.0:
2428; CHECK-NEXT:    zip2 p0.b, p0.b, p1.b
2429; CHECK-NEXT:    ret
2430  %out = call <vscale x 16 x i1> @llvm.aarch64.sve.zip2.nxv16i1(<vscale x 16 x i1> %a,
2431                                                                <vscale x 16 x i1> %b)
2432  ret <vscale x 16 x i1> %out
2433}
2434
2435define <vscale x 8 x i1> @zip2_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
2436; CHECK-LABEL: zip2_nxv8i1:
2437; CHECK:       // %bb.0:
2438; CHECK-NEXT:    zip2 p0.h, p0.h, p1.h
2439; CHECK-NEXT:    ret
2440  %out = call <vscale x 8 x i1> @llvm.aarch64.sve.zip2.nxv8i1(<vscale x 8 x i1> %a,
2441                                                              <vscale x 8 x i1> %b)
2442  ret <vscale x 8 x i1> %out
2443}
2444
2445define <vscale x 4 x i1> @zip2_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
2446; CHECK-LABEL: zip2_nxv4i1:
2447; CHECK:       // %bb.0:
2448; CHECK-NEXT:    zip2 p0.s, p0.s, p1.s
2449; CHECK-NEXT:    ret
2450  %out = call <vscale x 4 x i1> @llvm.aarch64.sve.zip2.nxv4i1(<vscale x 4 x i1> %a,
2451                                                              <vscale x 4 x i1> %b)
2452  ret <vscale x 4 x i1> %out
2453}
2454
2455define <vscale x 2 x i1> @zip2_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
2456; CHECK-LABEL: zip2_nxv2i1:
2457; CHECK:       // %bb.0:
2458; CHECK-NEXT:    zip2 p0.d, p0.d, p1.d
2459; CHECK-NEXT:    ret
2460  %out = call <vscale x 2 x i1> @llvm.aarch64.sve.zip2.nxv2i1(<vscale x 2 x i1> %a,
2461                                                              <vscale x 2 x i1> %b)
2462  ret <vscale x 2 x i1> %out
2463}
2464
2465define <vscale x 16 x i1> @zip2_b16(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
2466; CHECK-LABEL: zip2_b16:
2467; CHECK:       // %bb.0:
2468; CHECK-NEXT:    zip2 p0.h, p0.h, p1.h
2469; CHECK-NEXT:    ret
2470  %out = call <vscale x 16 x i1> @llvm.aarch64.sve.zip2.b16(<vscale x 16 x i1> %a,
2471                                                            <vscale x 16 x i1> %b)
2472  ret <vscale x 16 x i1> %out
2473}
2474
2475define <vscale x 16 x i1> @zip2_b32(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
2476; CHECK-LABEL: zip2_b32:
2477; CHECK:       // %bb.0:
2478; CHECK-NEXT:    zip2 p0.s, p0.s, p1.s
2479; CHECK-NEXT:    ret
2480  %out = call <vscale x 16 x i1> @llvm.aarch64.sve.zip2.b32(<vscale x 16 x i1> %a,
2481                                                            <vscale x 16 x i1> %b)
2482  ret <vscale x 16 x i1> %out
2483}
2484
2485define <vscale x 16 x i1> @zip2_b64(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
2486; CHECK-LABEL: zip2_b64:
2487; CHECK:       // %bb.0:
2488; CHECK-NEXT:    zip2 p0.d, p0.d, p1.d
2489; CHECK-NEXT:    ret
2490  %out = call <vscale x 16 x i1> @llvm.aarch64.sve.zip2.b64(<vscale x 16 x i1> %a,
2491                                                            <vscale x 16 x i1> %b)
2492  ret <vscale x 16 x i1> %out
2493}
2494
2495define <vscale x 16 x i8> @zip2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
2496; CHECK-LABEL: zip2_i8:
2497; CHECK:       // %bb.0:
2498; CHECK-NEXT:    zip2 z0.b, z0.b, z1.b
2499; CHECK-NEXT:    ret
2500  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.zip2.nxv16i8(<vscale x 16 x i8> %a,
2501                                                                <vscale x 16 x i8> %b)
2502  ret <vscale x 16 x i8> %out
2503}
2504
2505define <vscale x 8 x i16> @zip2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
2506; CHECK-LABEL: zip2_i16:
2507; CHECK:       // %bb.0:
2508; CHECK-NEXT:    zip2 z0.h, z0.h, z1.h
2509; CHECK-NEXT:    ret
2510  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.zip2.nxv8i16(<vscale x 8 x i16> %a,
2511                                                                <vscale x 8 x i16> %b)
2512  ret <vscale x 8 x i16> %out
2513}
2514
2515define <vscale x 4 x i32> @zip2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
2516; CHECK-LABEL: zip2_i32:
2517; CHECK:       // %bb.0:
2518; CHECK-NEXT:    zip2 z0.s, z0.s, z1.s
2519; CHECK-NEXT:    ret
2520  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.zip2.nxv4i32(<vscale x 4 x i32> %a,
2521                                                                <vscale x 4 x i32> %b)
2522  ret <vscale x 4 x i32> %out
2523}
2524
2525define <vscale x 2 x i64> @zip2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
2526; CHECK-LABEL: zip2_i64:
2527; CHECK:       // %bb.0:
2528; CHECK-NEXT:    zip2 z0.d, z0.d, z1.d
2529; CHECK-NEXT:    ret
2530  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.zip2.nxv2i64(<vscale x 2 x i64> %a,
2531                                                                <vscale x 2 x i64> %b)
2532  ret <vscale x 2 x i64> %out
2533}
2534
2535define <vscale x 2 x half> @zip2_f16_v2(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
2536; CHECK-LABEL: zip2_f16_v2:
2537; CHECK:       // %bb.0:
2538; CHECK-NEXT:    zip2 z0.d, z0.d, z1.d
2539; CHECK-NEXT:    ret
2540  %out = call <vscale x 2 x half> @llvm.aarch64.sve.zip2.nxv2f16(<vscale x 2 x half> %a,
2541                                                                 <vscale x 2 x half> %b)
2542  ret <vscale x 2 x half> %out
2543}
2544
2545define <vscale x 4 x half> @zip2_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
2546; CHECK-LABEL: zip2_f16_v4:
2547; CHECK:       // %bb.0:
2548; CHECK-NEXT:    zip2 z0.s, z0.s, z1.s
2549; CHECK-NEXT:    ret
2550  %out = call <vscale x 4 x half> @llvm.aarch64.sve.zip2.nxv4f16(<vscale x 4 x half> %a,
2551                                                                 <vscale x 4 x half> %b)
2552  ret <vscale x 4 x half> %out
2553}
2554
2555define <vscale x 8 x bfloat> @zip2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
2556; CHECK-LABEL: zip2_bf16:
2557; CHECK:       // %bb.0:
2558; CHECK-NEXT:    zip2 z0.h, z0.h, z1.h
2559; CHECK-NEXT:    ret
2560  %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.zip2.nxv8bf16(<vscale x 8 x bfloat> %a,
2561                                                                    <vscale x 8 x bfloat> %b)
2562  ret <vscale x 8 x bfloat> %out
2563}
2564
2565define <vscale x 8 x half> @zip2_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
2566; CHECK-LABEL: zip2_f16:
2567; CHECK:       // %bb.0:
2568; CHECK-NEXT:    zip2 z0.h, z0.h, z1.h
2569; CHECK-NEXT:    ret
2570  %out = call <vscale x 8 x half> @llvm.aarch64.sve.zip2.nxv8f16(<vscale x 8 x half> %a,
2571                                                                 <vscale x 8 x half> %b)
2572  ret <vscale x 8 x half> %out
2573}
2574
2575define <vscale x 2 x float> @zip2_f32_v2(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
2576; CHECK-LABEL: zip2_f32_v2:
2577; CHECK:       // %bb.0:
2578; CHECK-NEXT:    zip2 z0.d, z0.d, z1.d
2579; CHECK-NEXT:    ret
2580  %out = call <vscale x 2 x float> @llvm.aarch64.sve.zip2.nxv2f32(<vscale x 2 x float> %a,
2581                                                                  <vscale x 2 x float> %b)
2582  ret <vscale x 2 x float> %out
2583}
2584
2585define <vscale x 4 x float> @zip2_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
2586; CHECK-LABEL: zip2_f32:
2587; CHECK:       // %bb.0:
2588; CHECK-NEXT:    zip2 z0.s, z0.s, z1.s
2589; CHECK-NEXT:    ret
2590  %out = call <vscale x 4 x float> @llvm.aarch64.sve.zip2.nxv4f32(<vscale x 4 x float> %a,
2591                                                                  <vscale x 4 x float> %b)
2592  ret <vscale x 4 x float> %out
2593}
2594
2595define <vscale x 2 x double> @zip2_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
2596; CHECK-LABEL: zip2_f64:
2597; CHECK:       // %bb.0:
2598; CHECK-NEXT:    zip2 z0.d, z0.d, z1.d
2599; CHECK-NEXT:    ret
2600  %out = call <vscale x 2 x double> @llvm.aarch64.sve.zip2.nxv2f64(<vscale x 2 x double> %a,
2601                                                                   <vscale x 2 x double> %b)
2602  ret <vscale x 2 x double> %out
2603}
2604
2605declare <vscale x 16 x i8> @llvm.aarch64.sve.clasta.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2606declare <vscale x 8 x i16> @llvm.aarch64.sve.clasta.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2607declare <vscale x 4 x i32> @llvm.aarch64.sve.clasta.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2608declare <vscale x 2 x i64> @llvm.aarch64.sve.clasta.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2609declare <vscale x 8 x half> @llvm.aarch64.sve.clasta.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
2610declare <vscale x 8 x bfloat> @llvm.aarch64.sve.clasta.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
2611declare <vscale x 4 x float> @llvm.aarch64.sve.clasta.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
2612declare <vscale x 2 x double> @llvm.aarch64.sve.clasta.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
2613
2614declare i8 @llvm.aarch64.sve.clasta.n.nxv16i8(<vscale x 16 x i1>, i8, <vscale x 16 x i8>)
2615declare i16 @llvm.aarch64.sve.clasta.n.nxv8i16(<vscale x 8 x i1>, i16, <vscale x 8 x i16>)
2616declare i32 @llvm.aarch64.sve.clasta.n.nxv4i32(<vscale x 4 x i1>, i32, <vscale x 4 x i32>)
2617declare i64 @llvm.aarch64.sve.clasta.n.nxv2i64(<vscale x 2 x i1>, i64, <vscale x 2 x i64>)
2618declare half @llvm.aarch64.sve.clasta.n.nxv8f16(<vscale x 8 x i1>, half, <vscale x 8 x half>)
2619declare bfloat @llvm.aarch64.sve.clasta.n.nxv8bf16(<vscale x 8 x i1>, bfloat, <vscale x 8 x bfloat>)
2620declare float @llvm.aarch64.sve.clasta.n.nxv4f32(<vscale x 4 x i1>, float, <vscale x 4 x float>)
2621declare double @llvm.aarch64.sve.clasta.n.nxv2f64(<vscale x 2 x i1>, double, <vscale x 2 x double>)
2622
2623declare <vscale x 16 x i8> @llvm.aarch64.sve.clastb.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2624declare <vscale x 8 x i16> @llvm.aarch64.sve.clastb.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2625declare <vscale x 4 x i32> @llvm.aarch64.sve.clastb.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2626declare <vscale x 2 x i64> @llvm.aarch64.sve.clastb.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2627declare <vscale x 8 x half> @llvm.aarch64.sve.clastb.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
2628declare <vscale x 8 x bfloat> @llvm.aarch64.sve.clastb.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
2629declare <vscale x 4 x float> @llvm.aarch64.sve.clastb.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
2630declare <vscale x 2 x double> @llvm.aarch64.sve.clastb.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
2631
2632declare i8 @llvm.aarch64.sve.clastb.n.nxv16i8(<vscale x 16 x i1>, i8, <vscale x 16 x i8>)
2633declare i16 @llvm.aarch64.sve.clastb.n.nxv8i16(<vscale x 8 x i1>, i16, <vscale x 8 x i16>)
2634declare i32 @llvm.aarch64.sve.clastb.n.nxv4i32(<vscale x 4 x i1>, i32, <vscale x 4 x i32>)
2635declare i64 @llvm.aarch64.sve.clastb.n.nxv2i64(<vscale x 2 x i1>, i64, <vscale x 2 x i64>)
2636declare half @llvm.aarch64.sve.clastb.n.nxv8f16(<vscale x 8 x i1>, half, <vscale x 8 x half>)
2637declare bfloat @llvm.aarch64.sve.clastb.n.nxv8bf16(<vscale x 8 x i1>, bfloat, <vscale x 8 x bfloat>)
2638declare float @llvm.aarch64.sve.clastb.n.nxv4f32(<vscale x 4 x i1>, float, <vscale x 4 x float>)
2639declare double @llvm.aarch64.sve.clastb.n.nxv2f64(<vscale x 2 x i1>, double, <vscale x 2 x double>)
2640
2641declare <vscale x 4 x i32> @llvm.aarch64.sve.compact.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>)
2642declare <vscale x 2 x i64> @llvm.aarch64.sve.compact.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>)
2643declare <vscale x 4 x float> @llvm.aarch64.sve.compact.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>)
2644declare <vscale x 2 x double> @llvm.aarch64.sve.compact.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>)
2645
2646declare <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8>, i64)
2647declare <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16>, i64)
2648declare <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32>, i64)
2649declare <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64>, i64)
2650declare <vscale x 8 x half> @llvm.aarch64.sve.dupq.lane.nxv8f16(<vscale x 8 x half>, i64)
2651declare <vscale x 8 x bfloat> @llvm.aarch64.sve.dupq.lane.nxv8bf16(<vscale x 8 x bfloat>, i64)
2652declare <vscale x 4 x float> @llvm.aarch64.sve.dupq.lane.nxv4f32(<vscale x 4 x float>, i64)
2653declare <vscale x 2 x double> @llvm.aarch64.sve.dupq.lane.nxv2f64(<vscale x 2 x double>, i64)
2654
2655declare <vscale x 16 x i8> @llvm.aarch64.sve.ext.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32)
2656declare <vscale x 8 x i16> @llvm.aarch64.sve.ext.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
2657declare <vscale x 4 x i32> @llvm.aarch64.sve.ext.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
2658declare <vscale x 2 x i64> @llvm.aarch64.sve.ext.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)
2659declare <vscale x 8 x bfloat> @llvm.aarch64.sve.ext.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i32)
2660declare <vscale x 8 x half> @llvm.aarch64.sve.ext.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, i32)
2661declare <vscale x 4 x float> @llvm.aarch64.sve.ext.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, i32)
2662declare <vscale x 2 x double> @llvm.aarch64.sve.ext.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, i32)
2663
2664declare i8 @llvm.aarch64.sve.lasta.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>)
2665declare i16 @llvm.aarch64.sve.lasta.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>)
2666declare i32 @llvm.aarch64.sve.lasta.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>)
2667declare i64 @llvm.aarch64.sve.lasta.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>)
2668declare half @llvm.aarch64.sve.lasta.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>)
2669declare bfloat @llvm.aarch64.sve.lasta.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>)
2670declare float @llvm.aarch64.sve.lasta.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>)
2671declare float @llvm.aarch64.sve.lasta.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>)
2672declare double @llvm.aarch64.sve.lasta.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>)
2673
2674declare i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>)
2675declare i16 @llvm.aarch64.sve.lastb.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>)
2676declare i32 @llvm.aarch64.sve.lastb.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>)
2677declare i64 @llvm.aarch64.sve.lastb.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>)
2678declare half @llvm.aarch64.sve.lastb.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>)
2679declare bfloat @llvm.aarch64.sve.lastb.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>)
2680declare float @llvm.aarch64.sve.lastb.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>)
2681declare float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>)
2682declare double @llvm.aarch64.sve.lastb.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>)
2683
2684declare <vscale x 16 x i1> @llvm.aarch64.sve.rev.nxv16i1(<vscale x 16 x i1>)
2685declare <vscale x 8 x i1> @llvm.aarch64.sve.rev.nxv8i1(<vscale x 8 x i1>)
2686declare <vscale x 4 x i1> @llvm.aarch64.sve.rev.nxv4i1(<vscale x 4 x i1>)
2687declare <vscale x 2 x i1> @llvm.aarch64.sve.rev.nxv2i1(<vscale x 2 x i1>)
2688declare <vscale x 16 x i8> @llvm.aarch64.sve.rev.nxv16i8(<vscale x 16 x i8>)
2689declare <vscale x 8 x i16> @llvm.aarch64.sve.rev.nxv8i16(<vscale x 8 x i16>)
2690declare <vscale x 4 x i32> @llvm.aarch64.sve.rev.nxv4i32(<vscale x 4 x i32>)
2691declare <vscale x 2 x i64> @llvm.aarch64.sve.rev.nxv2i64(<vscale x 2 x i64>)
2692declare <vscale x 8 x bfloat> @llvm.aarch64.sve.rev.nxv8bf16(<vscale x 8 x bfloat>)
2693declare <vscale x 8 x half> @llvm.aarch64.sve.rev.nxv8f16(<vscale x 8 x half>)
2694declare <vscale x 4 x float> @llvm.aarch64.sve.rev.nxv4f32(<vscale x 4 x float>)
2695declare <vscale x 2 x double> @llvm.aarch64.sve.rev.nxv2f64(<vscale x 2 x double>)
2696
2697declare <vscale x 16 x i1> @llvm.aarch64.sve.rev.b16(<vscale x 16 x i1>)
2698declare <vscale x 16 x i1> @llvm.aarch64.sve.rev.b32(<vscale x 16 x i1>)
2699declare <vscale x 16 x i1> @llvm.aarch64.sve.rev.b64(<vscale x 16 x i1>)
2700
2701declare <vscale x 16 x i8> @llvm.aarch64.sve.splice.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2702declare <vscale x 8 x i16> @llvm.aarch64.sve.splice.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2703declare <vscale x 4 x i32> @llvm.aarch64.sve.splice.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2704declare <vscale x 2 x i64> @llvm.aarch64.sve.splice.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2705declare <vscale x 8 x bfloat> @llvm.aarch64.sve.splice.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
2706declare <vscale x 8 x half> @llvm.aarch64.sve.splice.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
2707declare <vscale x 4 x float> @llvm.aarch64.sve.splice.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
2708declare <vscale x 2 x double> @llvm.aarch64.sve.splice.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
2709
2710declare <vscale x 8 x i16> @llvm.aarch64.sve.sunpkhi.nxv8i16(<vscale x 16 x i8>)
2711declare <vscale x 4 x i32> @llvm.aarch64.sve.sunpkhi.nxv4i32(<vscale x 8 x i16>)
2712declare <vscale x 2 x i64> @llvm.aarch64.sve.sunpkhi.nxv2i64(<vscale x 4 x i32>)
2713
2714declare <vscale x 8 x i16> @llvm.aarch64.sve.sunpklo.nxv8i16(<vscale x 16 x i8>)
2715declare <vscale x 4 x i32> @llvm.aarch64.sve.sunpklo.nxv4i32(<vscale x 8 x i16>)
2716declare <vscale x 2 x i64> @llvm.aarch64.sve.sunpklo.nxv2i64(<vscale x 4 x i32>)
2717
2718declare <vscale x 16 x i8> @llvm.aarch64.sve.tbl.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
2719declare <vscale x 8 x i16> @llvm.aarch64.sve.tbl.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
2720declare <vscale x 4 x i32> @llvm.aarch64.sve.tbl.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
2721declare <vscale x 2 x i64> @llvm.aarch64.sve.tbl.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
2722declare <vscale x 8 x half> @llvm.aarch64.sve.tbl.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i16>)
2723declare <vscale x 8 x bfloat> @llvm.aarch64.sve.tbl.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i16>)
2724declare <vscale x 4 x float> @llvm.aarch64.sve.tbl.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i32>)
2725declare <vscale x 2 x double> @llvm.aarch64.sve.tbl.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i64>)
2726
2727declare <vscale x 8 x i16> @llvm.aarch64.sve.uunpkhi.nxv8i16(<vscale x 16 x i8>)
2728declare <vscale x 4 x i32> @llvm.aarch64.sve.uunpkhi.nxv4i32(<vscale x 8 x i16>)
2729declare <vscale x 2 x i64> @llvm.aarch64.sve.uunpkhi.nxv2i64(<vscale x 4 x i32>)
2730
2731declare <vscale x 8 x i16> @llvm.aarch64.sve.uunpklo.nxv8i16(<vscale x 16 x i8>)
2732declare <vscale x 4 x i32> @llvm.aarch64.sve.uunpklo.nxv4i32(<vscale x 8 x i16>)
2733declare <vscale x 2 x i64> @llvm.aarch64.sve.uunpklo.nxv2i64(<vscale x 4 x i32>)
2734
2735declare <vscale x 16 x i1> @llvm.aarch64.sve.trn1.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
2736declare <vscale x 8 x i1> @llvm.aarch64.sve.trn1.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
2737declare <vscale x 4 x i1> @llvm.aarch64.sve.trn1.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
2738declare <vscale x 2 x i1> @llvm.aarch64.sve.trn1.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>)
2739declare <vscale x 16 x i8> @llvm.aarch64.sve.trn1.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
2740declare <vscale x 8 x i16> @llvm.aarch64.sve.trn1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
2741declare <vscale x 4 x i32> @llvm.aarch64.sve.trn1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
2742declare <vscale x 2 x i64> @llvm.aarch64.sve.trn1.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
2743declare <vscale x 2 x half> @llvm.aarch64.sve.trn1.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>)
2744declare <vscale x 4 x half> @llvm.aarch64.sve.trn1.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>)
2745declare <vscale x 8 x bfloat> @llvm.aarch64.sve.trn1.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
2746declare <vscale x 8 x half> @llvm.aarch64.sve.trn1.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
2747declare <vscale x 2 x float> @llvm.aarch64.sve.trn1.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>)
2748declare <vscale x 4 x float> @llvm.aarch64.sve.trn1.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
2749declare <vscale x 2 x double> @llvm.aarch64.sve.trn1.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
2750
2751declare <vscale x 16 x i1> @llvm.aarch64.sve.trn1.b16(<vscale x 16 x i1>, <vscale x 16 x i1>)
2752declare <vscale x 16 x i1> @llvm.aarch64.sve.trn1.b32(<vscale x 16 x i1>, <vscale x 16 x i1>)
2753declare <vscale x 16 x i1> @llvm.aarch64.sve.trn1.b64(<vscale x 16 x i1>, <vscale x 16 x i1>)
2754
2755declare <vscale x 16 x i1> @llvm.aarch64.sve.trn2.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
2756declare <vscale x 8 x i1> @llvm.aarch64.sve.trn2.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
2757declare <vscale x 4 x i1> @llvm.aarch64.sve.trn2.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
2758declare <vscale x 2 x i1> @llvm.aarch64.sve.trn2.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>)
2759declare <vscale x 16 x i8> @llvm.aarch64.sve.trn2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
2760declare <vscale x 8 x i16> @llvm.aarch64.sve.trn2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
2761declare <vscale x 4 x i32> @llvm.aarch64.sve.trn2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
2762declare <vscale x 2 x i64> @llvm.aarch64.sve.trn2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
2763declare <vscale x 2 x half> @llvm.aarch64.sve.trn2.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>)
2764declare <vscale x 4 x half> @llvm.aarch64.sve.trn2.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>)
2765declare <vscale x 8 x bfloat> @llvm.aarch64.sve.trn2.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
2766declare <vscale x 8 x half> @llvm.aarch64.sve.trn2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
2767declare <vscale x 2 x float> @llvm.aarch64.sve.trn2.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>)
2768declare <vscale x 4 x float> @llvm.aarch64.sve.trn2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
2769declare <vscale x 2 x double> @llvm.aarch64.sve.trn2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
2770
2771declare <vscale x 16 x i1> @llvm.aarch64.sve.trn2.b16(<vscale x 16 x i1>, <vscale x 16 x i1>)
2772declare <vscale x 16 x i1> @llvm.aarch64.sve.trn2.b32(<vscale x 16 x i1>, <vscale x 16 x i1>)
2773declare <vscale x 16 x i1> @llvm.aarch64.sve.trn2.b64(<vscale x 16 x i1>, <vscale x 16 x i1>)
2774
2775declare <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
2776declare <vscale x 8 x i1> @llvm.aarch64.sve.uzp1.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
2777declare <vscale x 4 x i1> @llvm.aarch64.sve.uzp1.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
2778declare <vscale x 2 x i1> @llvm.aarch64.sve.uzp1.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>)
2779declare <vscale x 16 x i8> @llvm.aarch64.sve.uzp1.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
2780declare <vscale x 8 x i16> @llvm.aarch64.sve.uzp1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
2781declare <vscale x 4 x i32> @llvm.aarch64.sve.uzp1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
2782declare <vscale x 2 x i64> @llvm.aarch64.sve.uzp1.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
2783declare <vscale x 2 x half> @llvm.aarch64.sve.uzp1.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>)
2784declare <vscale x 4 x half> @llvm.aarch64.sve.uzp1.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>)
2785declare <vscale x 8 x bfloat> @llvm.aarch64.sve.uzp1.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
2786declare <vscale x 8 x half> @llvm.aarch64.sve.uzp1.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
2787declare <vscale x 2 x float> @llvm.aarch64.sve.uzp1.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>)
2788declare <vscale x 4 x float> @llvm.aarch64.sve.uzp1.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
2789declare <vscale x 2 x double> @llvm.aarch64.sve.uzp1.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
2790
2791declare <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.b16(<vscale x 16 x i1>, <vscale x 16 x i1>)
2792declare <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.b32(<vscale x 16 x i1>, <vscale x 16 x i1>)
2793declare <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.b64(<vscale x 16 x i1>, <vscale x 16 x i1>)
2794
2795declare <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
2796declare <vscale x 8 x i1> @llvm.aarch64.sve.uzp2.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
2797declare <vscale x 4 x i1> @llvm.aarch64.sve.uzp2.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
2798declare <vscale x 2 x i1> @llvm.aarch64.sve.uzp2.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>)
2799declare <vscale x 16 x i8> @llvm.aarch64.sve.uzp2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
2800declare <vscale x 8 x i16> @llvm.aarch64.sve.uzp2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
2801declare <vscale x 4 x i32> @llvm.aarch64.sve.uzp2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
2802declare <vscale x 2 x i64> @llvm.aarch64.sve.uzp2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
2803declare <vscale x 2 x half> @llvm.aarch64.sve.uzp2.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>)
2804declare <vscale x 4 x half> @llvm.aarch64.sve.uzp2.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>)
2805declare <vscale x 8 x bfloat> @llvm.aarch64.sve.uzp2.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
2806declare <vscale x 8 x half> @llvm.aarch64.sve.uzp2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
2807declare <vscale x 2 x float> @llvm.aarch64.sve.uzp2.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>)
2808declare <vscale x 4 x float> @llvm.aarch64.sve.uzp2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
2809declare <vscale x 2 x double> @llvm.aarch64.sve.uzp2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
2810
2811declare <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.b16(<vscale x 16 x i1>, <vscale x 16 x i1>)
2812declare <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.b32(<vscale x 16 x i1>, <vscale x 16 x i1>)
2813declare <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.b64(<vscale x 16 x i1>, <vscale x 16 x i1>)
2814
2815declare <vscale x 16 x i1> @llvm.aarch64.sve.zip1.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
2816declare <vscale x 8 x i1> @llvm.aarch64.sve.zip1.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
2817declare <vscale x 4 x i1> @llvm.aarch64.sve.zip1.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
2818declare <vscale x 2 x i1> @llvm.aarch64.sve.zip1.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>)
2819declare <vscale x 16 x i8> @llvm.aarch64.sve.zip1.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
2820declare <vscale x 8 x i16> @llvm.aarch64.sve.zip1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
2821declare <vscale x 4 x i32> @llvm.aarch64.sve.zip1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
2822declare <vscale x 2 x i64> @llvm.aarch64.sve.zip1.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
2823declare <vscale x 2 x half> @llvm.aarch64.sve.zip1.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>)
2824declare <vscale x 4 x half> @llvm.aarch64.sve.zip1.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>)
2825declare <vscale x 8 x bfloat> @llvm.aarch64.sve.zip1.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
2826declare <vscale x 8 x half> @llvm.aarch64.sve.zip1.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
2827declare <vscale x 2 x float> @llvm.aarch64.sve.zip1.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>)
2828declare <vscale x 4 x float> @llvm.aarch64.sve.zip1.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
2829declare <vscale x 2 x double> @llvm.aarch64.sve.zip1.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
2830
2831declare <vscale x 16 x i1> @llvm.aarch64.sve.zip1.b16(<vscale x 16 x i1>, <vscale x 16 x i1>)
2832declare <vscale x 16 x i1> @llvm.aarch64.sve.zip1.b32(<vscale x 16 x i1>, <vscale x 16 x i1>)
2833declare <vscale x 16 x i1> @llvm.aarch64.sve.zip1.b64(<vscale x 16 x i1>, <vscale x 16 x i1>)
2834
2835declare <vscale x 16 x i1> @llvm.aarch64.sve.zip2.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
2836declare <vscale x 8 x i1> @llvm.aarch64.sve.zip2.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
2837declare <vscale x 4 x i1> @llvm.aarch64.sve.zip2.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
2838declare <vscale x 2 x i1> @llvm.aarch64.sve.zip2.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>)
2839declare <vscale x 16 x i8> @llvm.aarch64.sve.zip2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
2840declare <vscale x 8 x i16> @llvm.aarch64.sve.zip2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
2841declare <vscale x 4 x i32> @llvm.aarch64.sve.zip2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
2842declare <vscale x 2 x i64> @llvm.aarch64.sve.zip2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
2843declare <vscale x 2 x half> @llvm.aarch64.sve.zip2.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>)
2844declare <vscale x 4 x half> @llvm.aarch64.sve.zip2.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>)
2845declare <vscale x 8 x bfloat> @llvm.aarch64.sve.zip2.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
2846declare <vscale x 8 x half> @llvm.aarch64.sve.zip2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
2847declare <vscale x 2 x float> @llvm.aarch64.sve.zip2.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>)
2848declare <vscale x 4 x float> @llvm.aarch64.sve.zip2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
2849declare <vscale x 2 x double> @llvm.aarch64.sve.zip2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
2850
2851declare <vscale x 16 x i1> @llvm.aarch64.sve.zip2.b16(<vscale x 16 x i1>, <vscale x 16 x i1>)
2852declare <vscale x 16 x i1> @llvm.aarch64.sve.zip2.b32(<vscale x 16 x i1>, <vscale x 16 x i1>)
2853declare <vscale x 16 x i1> @llvm.aarch64.sve.zip2.b64(<vscale x 16 x i1>, <vscale x 16 x i1>)
2854
2855declare <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v2f64(<vscale x 2 x double>, <2 x double>, i64)
2856declare <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float>, <4 x float>, i64)
2857declare <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v8f16(<vscale x 8 x half>, <8 x half>, i64)
2858declare <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64>, <2 x i64>, i64)
2859declare <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32>, <4 x i32>, i64)
2860declare <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16>, <8 x i16>, i64)
2861declare <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8>, <16 x i8>, i64)
2862declare <vscale x 8 x bfloat> @llvm.vector.insert.nxv8bf16.v8bf16(<vscale x 8 x bfloat>, <8 x bfloat>, i64)
2863
2864; +bf16 is required for the bfloat version.
2865attributes #0 = { "target-features"="+sve,+bf16" }
2866