xref: /llvm-project/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll (revision fadea4413ecbfffa4d28ad8298e0628165b543f1)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
3
4;
5; SABA
6;
7
8define <vscale x 16 x i8> @saba_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
9; CHECK-LABEL: saba_i8:
10; CHECK:       // %bb.0:
11; CHECK-NEXT:    saba z0.b, z1.b, z2.b
12; CHECK-NEXT:    ret
13  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.saba.nxv16i8(<vscale x 16 x i8> %a,
14                                                                <vscale x 16 x i8> %b,
15                                                                <vscale x 16 x i8> %c)
16  ret <vscale x 16 x i8> %out
17}
18
19define <vscale x 8 x i16> @saba_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
20; CHECK-LABEL: saba_i16:
21; CHECK:       // %bb.0:
22; CHECK-NEXT:    saba z0.h, z1.h, z2.h
23; CHECK-NEXT:    ret
24  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.saba.nxv8i16(<vscale x 8 x i16> %a,
25                                                                <vscale x 8 x i16> %b,
26                                                                <vscale x 8 x i16> %c)
27  ret <vscale x 8 x i16> %out
28}
29
30define <vscale x 4 x i32> @saba_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
31; CHECK-LABEL: saba_i32:
32; CHECK:       // %bb.0:
33; CHECK-NEXT:    saba z0.s, z1.s, z2.s
34; CHECK-NEXT:    ret
35  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.saba.nxv4i32(<vscale x 4 x i32> %a,
36                                                                <vscale x 4 x i32> %b,
37                                                                <vscale x 4 x i32> %c)
38  ret <vscale x 4 x i32> %out
39}
40
41define <vscale x 2 x i64> @saba_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
42; CHECK-LABEL: saba_i64:
43; CHECK:       // %bb.0:
44; CHECK-NEXT:    saba z0.d, z1.d, z2.d
45; CHECK-NEXT:    ret
46  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.saba.nxv2i64(<vscale x 2 x i64> %a,
47                                                                <vscale x 2 x i64> %b,
48                                                                <vscale x 2 x i64> %c)
49  ret <vscale x 2 x i64> %out
50}
51
52;
53; SHADD
54;
55
56define <vscale x 16 x i8> @shadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
57; CHECK-LABEL: shadd_i8:
58; CHECK:       // %bb.0:
59; CHECK-NEXT:    shadd z0.b, p0/m, z0.b, z1.b
60; CHECK-NEXT:    ret
61  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.shadd.nxv16i8(<vscale x 16 x i1> %pg,
62                                                                 <vscale x 16 x i8> %a,
63                                                                 <vscale x 16 x i8> %b)
64  ret <vscale x 16 x i8> %out
65}
66
67define <vscale x 8 x i16> @shadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
68; CHECK-LABEL: shadd_i16:
69; CHECK:       // %bb.0:
70; CHECK-NEXT:    shadd z0.h, p0/m, z0.h, z1.h
71; CHECK-NEXT:    ret
72  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.shadd.nxv8i16(<vscale x 8 x i1> %pg,
73                                                                 <vscale x 8 x i16> %a,
74                                                                 <vscale x 8 x i16> %b)
75  ret <vscale x 8 x i16> %out
76}
77
78define <vscale x 4 x i32> @shadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
79; CHECK-LABEL: shadd_i32:
80; CHECK:       // %bb.0:
81; CHECK-NEXT:    shadd z0.s, p0/m, z0.s, z1.s
82; CHECK-NEXT:    ret
83  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.shadd.nxv4i32(<vscale x 4 x i1> %pg,
84                                                                 <vscale x 4 x i32> %a,
85                                                                 <vscale x 4 x i32> %b)
86  ret <vscale x 4 x i32> %out
87}
88
89define <vscale x 2 x i64> @shadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
90; CHECK-LABEL: shadd_i64:
91; CHECK:       // %bb.0:
92; CHECK-NEXT:    shadd z0.d, p0/m, z0.d, z1.d
93; CHECK-NEXT:    ret
94  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.shadd.nxv2i64(<vscale x 2 x i1> %pg,
95                                                                 <vscale x 2 x i64> %a,
96                                                                 <vscale x 2 x i64> %b)
97  ret <vscale x 2 x i64> %out
98}
99
100;
101; SHSUB
102;
103
104define <vscale x 16 x i8> @shsub_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
105; CHECK-LABEL: shsub_i8:
106; CHECK:       // %bb.0:
107; CHECK-NEXT:    shsub z0.b, p0/m, z0.b, z1.b
108; CHECK-NEXT:    ret
109  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.shsub.nxv16i8(<vscale x 16 x i1> %pg,
110                                                                 <vscale x 16 x i8> %a,
111                                                                 <vscale x 16 x i8> %b)
112  ret <vscale x 16 x i8> %out
113}
114
115define <vscale x 8 x i16> @shsub_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
116; CHECK-LABEL: shsub_i16:
117; CHECK:       // %bb.0:
118; CHECK-NEXT:    shsub z0.h, p0/m, z0.h, z1.h
119; CHECK-NEXT:    ret
120  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.shsub.nxv8i16(<vscale x 8 x i1> %pg,
121                                                                 <vscale x 8 x i16> %a,
122                                                                 <vscale x 8 x i16> %b)
123  ret <vscale x 8 x i16> %out
124}
125
126define <vscale x 4 x i32> @shsub_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
127; CHECK-LABEL: shsub_i32:
128; CHECK:       // %bb.0:
129; CHECK-NEXT:    shsub z0.s, p0/m, z0.s, z1.s
130; CHECK-NEXT:    ret
131  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.shsub.nxv4i32(<vscale x 4 x i1> %pg,
132                                                                 <vscale x 4 x i32> %a,
133                                                                 <vscale x 4 x i32> %b)
134  ret <vscale x 4 x i32> %out
135}
136
137define <vscale x 2 x i64> @shsub_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
138; CHECK-LABEL: shsub_i64:
139; CHECK:       // %bb.0:
140; CHECK-NEXT:    shsub z0.d, p0/m, z0.d, z1.d
141; CHECK-NEXT:    ret
142  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.shsub.nxv2i64(<vscale x 2 x i1> %pg,
143                                                                 <vscale x 2 x i64> %a,
144                                                                 <vscale x 2 x i64> %b)
145  ret <vscale x 2 x i64> %out
146}
147
148;
149; SHSUBR
150;
151
152define <vscale x 16 x i8> @shsubr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
153; CHECK-LABEL: shsubr_i8:
154; CHECK:       // %bb.0:
155; CHECK-NEXT:    shsubr z0.b, p0/m, z0.b, z1.b
156; CHECK-NEXT:    ret
157  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.shsubr.nxv16i8(<vscale x 16 x i1> %pg,
158                                                                  <vscale x 16 x i8> %a,
159                                                                  <vscale x 16 x i8> %b)
160  ret <vscale x 16 x i8> %out
161}
162
163define <vscale x 8 x i16> @shsubr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
164; CHECK-LABEL: shsubr_i16:
165; CHECK:       // %bb.0:
166; CHECK-NEXT:    shsubr z0.h, p0/m, z0.h, z1.h
167; CHECK-NEXT:    ret
168  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.shsubr.nxv8i16(<vscale x 8 x i1> %pg,
169                                                                  <vscale x 8 x i16> %a,
170                                                                  <vscale x 8 x i16> %b)
171  ret <vscale x 8 x i16> %out
172}
173
174define <vscale x 4 x i32> @shsubr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
175; CHECK-LABEL: shsubr_i32:
176; CHECK:       // %bb.0:
177; CHECK-NEXT:    shsubr z0.s, p0/m, z0.s, z1.s
178; CHECK-NEXT:    ret
179  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.shsubr.nxv4i32(<vscale x 4 x i1> %pg,
180                                                                  <vscale x 4 x i32> %a,
181                                                                  <vscale x 4 x i32> %b)
182  ret <vscale x 4 x i32> %out
183}
184
185define <vscale x 2 x i64> @shsubr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
186; CHECK-LABEL: shsubr_i64:
187; CHECK:       // %bb.0:
188; CHECK-NEXT:    shsubr z0.d, p0/m, z0.d, z1.d
189; CHECK-NEXT:    ret
190  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.shsubr.nxv2i64(<vscale x 2 x i1> %pg,
191                                                                  <vscale x 2 x i64> %a,
192                                                                  <vscale x 2 x i64> %b)
193  ret <vscale x 2 x i64> %out
194}
195
196;
197; SLI
198;
199
200define <vscale x 16 x i8> @sli_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
201; CHECK-LABEL: sli_i8:
202; CHECK:       // %bb.0:
203; CHECK-NEXT:    sli z0.b, z1.b, #0
204; CHECK-NEXT:    ret
205  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sli.nxv16i8(<vscale x 16 x i8> %a,
206                                                               <vscale x 16 x i8> %b,
207                                                               i32 0)
208  ret <vscale x 16 x i8> %out
209}
210
211define <vscale x 8 x i16> @sli_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
212; CHECK-LABEL: sli_i16:
213; CHECK:       // %bb.0:
214; CHECK-NEXT:    sli z0.h, z1.h, #1
215; CHECK-NEXT:    ret
216  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sli.nxv8i16(<vscale x 8 x i16> %a,
217                                                               <vscale x 8 x i16> %b,
218                                                               i32 1)
219  ret <vscale x 8 x i16> %out
220}
221
222define <vscale x 4 x i32> @sli_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
223; CHECK-LABEL: sli_i32:
224; CHECK:       // %bb.0:
225; CHECK-NEXT:    sli z0.s, z1.s, #30
226; CHECK-NEXT:    ret
227  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sli.nxv4i32(<vscale x 4 x i32> %a,
228                                                               <vscale x 4 x i32> %b,
229                                                               i32 30);
230  ret <vscale x 4 x i32> %out
231}
232
233define <vscale x 2 x i64> @sli_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
234; CHECK-LABEL: sli_i64:
235; CHECK:       // %bb.0:
236; CHECK-NEXT:    sli z0.d, z1.d, #63
237; CHECK-NEXT:    ret
238  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sli.nxv2i64(<vscale x 2 x i64> %a,
239                                                               <vscale x 2 x i64> %b,
240                                                               i32 63)
241  ret <vscale x 2 x i64> %out
242}
243
244;
245; SQABS
246;
247
248define <vscale x 16 x i8> @sqabs_i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b) {
249; CHECK-LABEL: sqabs_i8:
250; CHECK:       // %bb.0:
251; CHECK-NEXT:    sqabs z0.b, p0/m, z1.b
252; CHECK-NEXT:    ret
253  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqabs.nxv16i8(<vscale x 16 x i8> %a,
254                                                                 <vscale x 16 x i1> %pg,
255                                                                 <vscale x 16 x i8> %b)
256  ret <vscale x 16 x i8> %out
257}
258
259define <vscale x 8 x i16> @sqabs_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b) {
260; CHECK-LABEL: sqabs_i16:
261; CHECK:       // %bb.0:
262; CHECK-NEXT:    sqabs z0.h, p0/m, z1.h
263; CHECK-NEXT:    ret
264  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16> %a,
265                                                                 <vscale x 8 x i1> %pg,
266                                                                 <vscale x 8 x i16> %b)
267  ret <vscale x 8 x i16> %out
268}
269
270define <vscale x 4 x i32> @sqabs_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
271; CHECK-LABEL: sqabs_i32:
272; CHECK:       // %bb.0:
273; CHECK-NEXT:    sqabs z0.s, p0/m, z1.s
274; CHECK-NEXT:    ret
275  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32> %a,
276                                                                 <vscale x 4 x i1> %pg,
277                                                                 <vscale x 4 x i32> %b)
278  ret <vscale x 4 x i32> %out
279}
280
281define <vscale x 2 x i64> @sqabs_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) {
282; CHECK-LABEL: sqabs_i64:
283; CHECK:       // %bb.0:
284; CHECK-NEXT:    sqabs z0.d, p0/m, z1.d
285; CHECK-NEXT:    ret
286  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqabs.nxv2i64(<vscale x 2 x i64> %a,
287                                                                 <vscale x 2 x i1> %pg,
288                                                                 <vscale x 2 x i64> %b)
289  ret <vscale x 2 x i64> %out
290}
291
292;
293; SQADD
294;
295
296define <vscale x 16 x i8> @sqadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
297; CHECK-LABEL: sqadd_i8:
298; CHECK:       // %bb.0:
299; CHECK-NEXT:    sqadd z0.b, p0/m, z0.b, z1.b
300; CHECK-NEXT:    ret
301  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqadd.nxv16i8(<vscale x 16 x i1> %pg,
302                                                                 <vscale x 16 x i8> %a,
303                                                                 <vscale x 16 x i8> %b)
304  ret <vscale x 16 x i8> %out
305}
306
307define <vscale x 8 x i16> @sqadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
308; CHECK-LABEL: sqadd_i16:
309; CHECK:       // %bb.0:
310; CHECK-NEXT:    sqadd z0.h, p0/m, z0.h, z1.h
311; CHECK-NEXT:    ret
312  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqadd.nxv8i16(<vscale x 8 x i1> %pg,
313                                                                 <vscale x 8 x i16> %a,
314                                                                 <vscale x 8 x i16> %b)
315  ret <vscale x 8 x i16> %out
316}
317
318define <vscale x 4 x i32> @sqadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
319; CHECK-LABEL: sqadd_i32:
320; CHECK:       // %bb.0:
321; CHECK-NEXT:    sqadd z0.s, p0/m, z0.s, z1.s
322; CHECK-NEXT:    ret
323  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.nxv4i32(<vscale x 4 x i1> %pg,
324                                                                 <vscale x 4 x i32> %a,
325                                                                 <vscale x 4 x i32> %b)
326  ret <vscale x 4 x i32> %out
327}
328
329define <vscale x 2 x i64> @sqadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
330; CHECK-LABEL: sqadd_i64:
331; CHECK:       // %bb.0:
332; CHECK-NEXT:    sqadd z0.d, p0/m, z0.d, z1.d
333; CHECK-NEXT:    ret
334  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.nxv2i64(<vscale x 2 x i1> %pg,
335                                                                 <vscale x 2 x i64> %a,
336                                                                 <vscale x 2 x i64> %b)
337  ret <vscale x 2 x i64> %out
338}
339
340;
341; SQDMULH (Vector)
342;
343
344define <vscale x 16 x i8> @sqdmulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
345; CHECK-LABEL: sqdmulh_i8:
346; CHECK:       // %bb.0:
347; CHECK-NEXT:    sqdmulh z0.b, z0.b, z1.b
348; CHECK-NEXT:    ret
349  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqdmulh.nxv16i8(<vscale x 16 x i8> %a,
350                                                                   <vscale x 16 x i8> %b)
351  ret <vscale x 16 x i8> %out
352}
353
354define <vscale x 8 x i16> @sqdmulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
355; CHECK-LABEL: sqdmulh_i16:
356; CHECK:       // %bb.0:
357; CHECK-NEXT:    sqdmulh z0.h, z0.h, z1.h
358; CHECK-NEXT:    ret
359  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqdmulh.nxv8i16(<vscale x 8 x i16> %a,
360                                                                   <vscale x 8 x i16> %b)
361  ret <vscale x 8 x i16> %out
362}
363
364define <vscale x 4 x i32> @sqdmulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
365; CHECK-LABEL: sqdmulh_i32:
366; CHECK:       // %bb.0:
367; CHECK-NEXT:    sqdmulh z0.s, z0.s, z1.s
368; CHECK-NEXT:    ret
369  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqdmulh.nxv4i32(<vscale x 4 x i32> %a,
370                                                                   <vscale x 4 x i32> %b)
371  ret <vscale x 4 x i32> %out
372}
373
374define <vscale x 2 x i64> @sqdmulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
375; CHECK-LABEL: sqdmulh_i64:
376; CHECK:       // %bb.0:
377; CHECK-NEXT:    sqdmulh z0.d, z0.d, z1.d
378; CHECK-NEXT:    ret
379  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqdmulh.nxv2i64(<vscale x 2 x i64> %a,
380                                                                   <vscale x 2 x i64> %b)
381  ret <vscale x 2 x i64> %out
382}
383
384;
385; SQDMULH (Indexed)
386;
387
388define <vscale x 8 x i16> @sqdmulh_lane_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
389; CHECK-LABEL: sqdmulh_lane_i16:
390; CHECK:       // %bb.0:
391; CHECK-NEXT:    sqdmulh z0.h, z0.h, z1.h[7]
392; CHECK-NEXT:    ret
393  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqdmulh.lane.nxv8i16(<vscale x 8 x i16> %a,
394                                                                        <vscale x 8 x i16> %b,
395                                                                        i32 7)
396  ret <vscale x 8 x i16> %out
397}
398
399define <vscale x 4 x i32> @sqdmulh_lane_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
400; CHECK-LABEL: sqdmulh_lane_i32:
401; CHECK:       // %bb.0:
402; CHECK-NEXT:    sqdmulh z0.s, z0.s, z1.s[3]
403; CHECK-NEXT:    ret
404  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqdmulh.lane.nxv4i32(<vscale x 4 x i32> %a,
405                                                                        <vscale x 4 x i32> %b,
406                                                                        i32 3);
407  ret <vscale x 4 x i32> %out
408}
409
410define <vscale x 2 x i64> @sqdmulh_lane_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
411; CHECK-LABEL: sqdmulh_lane_i64:
412; CHECK:       // %bb.0:
413; CHECK-NEXT:    sqdmulh z0.d, z0.d, z1.d[1]
414; CHECK-NEXT:    ret
415  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqdmulh.lane.nxv2i64(<vscale x 2 x i64> %a,
416                                                                        <vscale x 2 x i64> %b,
417                                                                        i32 1)
418  ret <vscale x 2 x i64> %out
419}
420
421;
422; SQNEG
423;
424
425define <vscale x 16 x i8> @sqneg_i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b) {
426; CHECK-LABEL: sqneg_i8:
427; CHECK:       // %bb.0:
428; CHECK-NEXT:    sqneg z0.b, p0/m, z1.b
429; CHECK-NEXT:    ret
430  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqneg.nxv16i8(<vscale x 16 x i8> %a,
431                                                                 <vscale x 16 x i1> %pg,
432                                                                 <vscale x 16 x i8> %b)
433  ret <vscale x 16 x i8> %out
434}
435
436define <vscale x 8 x i16> @sqneg_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b) {
437; CHECK-LABEL: sqneg_i16:
438; CHECK:       // %bb.0:
439; CHECK-NEXT:    sqneg z0.h, p0/m, z1.h
440; CHECK-NEXT:    ret
441  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqneg.nxv8i16(<vscale x 8 x i16> %a,
442                                                                 <vscale x 8 x i1> %pg,
443                                                                 <vscale x 8 x i16> %b)
444  ret <vscale x 8 x i16> %out
445}
446
447define <vscale x 4 x i32> @sqneg_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
448; CHECK-LABEL: sqneg_i32:
449; CHECK:       // %bb.0:
450; CHECK-NEXT:    sqneg z0.s, p0/m, z1.s
451; CHECK-NEXT:    ret
452  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqneg.nxv4i32(<vscale x 4 x i32> %a,
453                                                                 <vscale x 4 x i1> %pg,
454                                                                 <vscale x 4 x i32> %b)
455  ret <vscale x 4 x i32> %out
456}
457
458define <vscale x 2 x i64> @sqneg_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) {
459; CHECK-LABEL: sqneg_i64:
460; CHECK:       // %bb.0:
461; CHECK-NEXT:    sqneg z0.d, p0/m, z1.d
462; CHECK-NEXT:    ret
463  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqneg.nxv2i64(<vscale x 2 x i64> %a,
464                                                                 <vscale x 2 x i1> %pg,
465                                                                 <vscale x 2 x i64> %b)
466  ret <vscale x 2 x i64> %out
467}
468
469;
470; SQRDMALH (Vectors)
471;
472
473define <vscale x 16 x i8> @sqrdmlah_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
474; CHECK-LABEL: sqrdmlah_i8:
475; CHECK:       // %bb.0:
476; CHECK-NEXT:    sqrdmlah z0.b, z1.b, z2.b
477; CHECK-NEXT:    ret
478  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrdmlah.nxv16i8(<vscale x 16 x i8> %a,
479                                                                    <vscale x 16 x i8> %b,
480                                                                    <vscale x 16 x i8> %c)
481  ret <vscale x 16 x i8> %out
482}
483
484define <vscale x 8 x i16> @sqrdmlah_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
485; CHECK-LABEL: sqrdmlah_i16:
486; CHECK:       // %bb.0:
487; CHECK-NEXT:    sqrdmlah z0.h, z1.h, z2.h
488; CHECK-NEXT:    ret
489  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmlah.nxv8i16(<vscale x 8 x i16> %a,
490                                                                    <vscale x 8 x i16> %b,
491                                                                    <vscale x 8 x i16> %c)
492  ret <vscale x 8 x i16> %out
493}
494
495define <vscale x 4 x i32> @sqrdmlah_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
496; CHECK-LABEL: sqrdmlah_i32:
497; CHECK:       // %bb.0:
498; CHECK-NEXT:    sqrdmlah z0.s, z1.s, z2.s
499; CHECK-NEXT:    ret
500  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmlah.nxv4i32(<vscale x 4 x i32> %a,
501                                                                    <vscale x 4 x i32> %b,
502                                                                    <vscale x 4 x i32> %c)
503  ret <vscale x 4 x i32> %out
504}
505
506define <vscale x 2 x i64> @sqrdmlah_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
507; CHECK-LABEL: sqrdmlah_i64:
508; CHECK:       // %bb.0:
509; CHECK-NEXT:    sqrdmlah z0.d, z1.d, z2.d
510; CHECK-NEXT:    ret
511  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmlah.nxv2i64(<vscale x 2 x i64> %a,
512                                                                    <vscale x 2 x i64> %b,
513                                                                    <vscale x 2 x i64> %c)
514  ret <vscale x 2 x i64> %out
515}
516
517;
518; SQRDMALH (Indexed)
519;
520
521define <vscale x 8 x i16> @sqrdmlah_lane_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
522; CHECK-LABEL: sqrdmlah_lane_i16:
523; CHECK:       // %bb.0:
524; CHECK-NEXT:    sqrdmlah z0.h, z1.h, z2.h[5]
525; CHECK-NEXT:    ret
526  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmlah.lane.nxv8i16(<vscale x 8 x i16> %a,
527                                                                         <vscale x 8 x i16> %b,
528                                                                         <vscale x 8 x i16> %c,
529                                                                         i32 5)
530  ret <vscale x 8 x i16> %out
531}
532
533define <vscale x 4 x i32> @sqrdmlah_lane_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
534; CHECK-LABEL: sqrdmlah_lane_i32:
535; CHECK:       // %bb.0:
536; CHECK-NEXT:    sqrdmlah z0.s, z1.s, z2.s[1]
537; CHECK-NEXT:    ret
538  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmlah.lane.nxv4i32(<vscale x 4 x i32> %a,
539                                                                         <vscale x 4 x i32> %b,
540                                                                         <vscale x 4 x i32> %c,
541                                                                         i32 1);
542  ret <vscale x 4 x i32> %out
543}
544
545define <vscale x 2 x i64> @sqrdmlah_lane_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
546; CHECK-LABEL: sqrdmlah_lane_i64:
547; CHECK:       // %bb.0:
548; CHECK-NEXT:    sqrdmlah z0.d, z1.d, z2.d[1]
549; CHECK-NEXT:    ret
550  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmlah.lane.nxv2i64(<vscale x 2 x i64> %a,
551                                                                         <vscale x 2 x i64> %b,
552                                                                         <vscale x 2 x i64> %c,
553                                                                         i32 1)
554  ret <vscale x 2 x i64> %out
555}
556
557;
558; SQRDMSLH (Vectors)
559;
560
561define <vscale x 16 x i8> @sqrdmlsh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
562; CHECK-LABEL: sqrdmlsh_i8:
563; CHECK:       // %bb.0:
564; CHECK-NEXT:    sqrdmlsh z0.b, z1.b, z2.b
565; CHECK-NEXT:    ret
566  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrdmlsh.nxv16i8(<vscale x 16 x i8> %a,
567                                                                    <vscale x 16 x i8> %b,
568                                                                    <vscale x 16 x i8> %c)
569  ret <vscale x 16 x i8> %out
570}
571
572define <vscale x 8 x i16> @sqrdmlsh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
573; CHECK-LABEL: sqrdmlsh_i16:
574; CHECK:       // %bb.0:
575; CHECK-NEXT:    sqrdmlsh z0.h, z1.h, z2.h
576; CHECK-NEXT:    ret
577  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmlsh.nxv8i16(<vscale x 8 x i16> %a,
578                                                                    <vscale x 8 x i16> %b,
579                                                                    <vscale x 8 x i16> %c)
580  ret <vscale x 8 x i16> %out
581}
582
583define <vscale x 4 x i32> @sqrdmlsh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
584; CHECK-LABEL: sqrdmlsh_i32:
585; CHECK:       // %bb.0:
586; CHECK-NEXT:    sqrdmlsh z0.s, z1.s, z2.s
587; CHECK-NEXT:    ret
588  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmlsh.nxv4i32(<vscale x 4 x i32> %a,
589                                                                    <vscale x 4 x i32> %b,
590                                                                    <vscale x 4 x i32> %c)
591  ret <vscale x 4 x i32> %out
592}
593
594define <vscale x 2 x i64> @sqrdmlsh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
595; CHECK-LABEL: sqrdmlsh_i64:
596; CHECK:       // %bb.0:
597; CHECK-NEXT:    sqrdmlsh z0.d, z1.d, z2.d
598; CHECK-NEXT:    ret
599  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmlsh.nxv2i64(<vscale x 2 x i64> %a,
600                                                                    <vscale x 2 x i64> %b,
601                                                                    <vscale x 2 x i64> %c)
602  ret <vscale x 2 x i64> %out
603}
604
605;
606; SQRDMSLH (Indexed)
607;
608
609define <vscale x 8 x i16> @sqrdmlsh_lane_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
610; CHECK-LABEL: sqrdmlsh_lane_i16:
611; CHECK:       // %bb.0:
612; CHECK-NEXT:    sqrdmlsh z0.h, z1.h, z2.h[4]
613; CHECK-NEXT:    ret
614  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmlsh.lane.nxv8i16(<vscale x 8 x i16> %a,
615                                                                         <vscale x 8 x i16> %b,
616                                                                         <vscale x 8 x i16> %c,
617                                                                         i32 4)
618  ret <vscale x 8 x i16> %out
619}
620
621define <vscale x 4 x i32> @sqrdmlsh_lane_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
622; CHECK-LABEL: sqrdmlsh_lane_i32:
623; CHECK:       // %bb.0:
624; CHECK-NEXT:    sqrdmlsh z0.s, z1.s, z2.s[0]
625; CHECK-NEXT:    ret
626  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmlsh.lane.nxv4i32(<vscale x 4 x i32> %a,
627                                                                         <vscale x 4 x i32> %b,
628                                                                         <vscale x 4 x i32> %c,
629                                                                         i32 0);
630  ret <vscale x 4 x i32> %out
631}
632
633define <vscale x 2 x i64> @sqrdmlsh_lane_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
634; CHECK-LABEL: sqrdmlsh_lane_i64:
635; CHECK:       // %bb.0:
636; CHECK-NEXT:    sqrdmlsh z0.d, z1.d, z2.d[1]
637; CHECK-NEXT:    ret
638  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmlsh.lane.nxv2i64(<vscale x 2 x i64> %a,
639                                                                         <vscale x 2 x i64> %b,
640                                                                         <vscale x 2 x i64> %c,
641                                                                         i32 1)
642  ret <vscale x 2 x i64> %out
643}
644
645;
646; SQRDMULH (Vectors)
647;
648
649define <vscale x 16 x i8> @sqrdmulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
650; CHECK-LABEL: sqrdmulh_i8:
651; CHECK:       // %bb.0:
652; CHECK-NEXT:    sqrdmulh z0.b, z0.b, z1.b
653; CHECK-NEXT:    ret
654  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrdmulh.nxv16i8(<vscale x 16 x i8> %a,
655                                                                    <vscale x 16 x i8> %b)
656  ret <vscale x 16 x i8> %out
657}
658
659define <vscale x 8 x i16> @sqrdmulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
660; CHECK-LABEL: sqrdmulh_i16:
661; CHECK:       // %bb.0:
662; CHECK-NEXT:    sqrdmulh z0.h, z0.h, z1.h
663; CHECK-NEXT:    ret
664  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmulh.nxv8i16(<vscale x 8 x i16> %a,
665                                                                    <vscale x 8 x i16> %b)
666  ret <vscale x 8 x i16> %out
667}
668
669define <vscale x 4 x i32> @sqrdmulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
670; CHECK-LABEL: sqrdmulh_i32:
671; CHECK:       // %bb.0:
672; CHECK-NEXT:    sqrdmulh z0.s, z0.s, z1.s
673; CHECK-NEXT:    ret
674  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmulh.nxv4i32(<vscale x 4 x i32> %a,
675                                                                    <vscale x 4 x i32> %b)
676  ret <vscale x 4 x i32> %out
677}
678
679define <vscale x 2 x i64> @sqrdmulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
680; CHECK-LABEL: sqrdmulh_i64:
681; CHECK:       // %bb.0:
682; CHECK-NEXT:    sqrdmulh z0.d, z0.d, z1.d
683; CHECK-NEXT:    ret
684  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmulh.nxv2i64(<vscale x 2 x i64> %a,
685                                                                    <vscale x 2 x i64> %b)
686  ret <vscale x 2 x i64> %out
687}
688
689;
690; SQRDMULH (Indexed)
691;
692
693define <vscale x 8 x i16> @sqrdmulh_lane_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
694; CHECK-LABEL: sqrdmulh_lane_i16:
695; CHECK:       // %bb.0:
696; CHECK-NEXT:    sqrdmulh z0.h, z0.h, z1.h[6]
697; CHECK-NEXT:    ret
698  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmulh.lane.nxv8i16(<vscale x 8 x i16> %a,
699                                                                         <vscale x 8 x i16> %b,
700                                                                         i32 6)
701  ret <vscale x 8 x i16> %out
702}
703
704define <vscale x 4 x i32> @sqrdmulh_lane_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
705; CHECK-LABEL: sqrdmulh_lane_i32:
706; CHECK:       // %bb.0:
707; CHECK-NEXT:    sqrdmulh z0.s, z0.s, z1.s[2]
708; CHECK-NEXT:    ret
709  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmulh.lane.nxv4i32(<vscale x 4 x i32> %a,
710                                                                         <vscale x 4 x i32> %b,
711                                                                         i32 2);
712  ret <vscale x 4 x i32> %out
713}
714
715define <vscale x 2 x i64> @sqrdmulh_lane_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
716; CHECK-LABEL: sqrdmulh_lane_i64:
717; CHECK:       // %bb.0:
718; CHECK-NEXT:    sqrdmulh z0.d, z0.d, z1.d[1]
719; CHECK-NEXT:    ret
720  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmulh.lane.nxv2i64(<vscale x 2 x i64> %a,
721                                                                         <vscale x 2 x i64> %b,
722                                                                         i32 1)
723  ret <vscale x 2 x i64> %out
724}
725
726;
727; SQRSHL
728;
729
730define <vscale x 16 x i8> @sqrshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
731; CHECK-LABEL: sqrshl_i8:
732; CHECK:       // %bb.0:
733; CHECK-NEXT:    sqrshl z0.b, p0/m, z0.b, z1.b
734; CHECK-NEXT:    ret
735  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshl.nxv16i8(<vscale x 16 x i1> %pg,
736                                                                  <vscale x 16 x i8> %a,
737                                                                  <vscale x 16 x i8> %b)
738  ret <vscale x 16 x i8> %out
739}
740
741define <vscale x 8 x i16> @sqrshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
742; CHECK-LABEL: sqrshl_i16:
743; CHECK:       // %bb.0:
744; CHECK-NEXT:    sqrshl z0.h, p0/m, z0.h, z1.h
745; CHECK-NEXT:    ret
746  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshl.nxv8i16(<vscale x 8 x i1> %pg,
747                                                                  <vscale x 8 x i16> %a,
748                                                                  <vscale x 8 x i16> %b)
749  ret <vscale x 8 x i16> %out
750}
751
752define <vscale x 4 x i32> @sqrshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
753; CHECK-LABEL: sqrshl_i32:
754; CHECK:       // %bb.0:
755; CHECK-NEXT:    sqrshl z0.s, p0/m, z0.s, z1.s
756; CHECK-NEXT:    ret
757  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshl.nxv4i32(<vscale x 4 x i1> %pg,
758                                                                  <vscale x 4 x i32> %a,
759                                                                  <vscale x 4 x i32> %b)
760  ret <vscale x 4 x i32> %out
761}
762
763define <vscale x 2 x i64> @sqrshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
764; CHECK-LABEL: sqrshl_i64:
765; CHECK:       // %bb.0:
766; CHECK-NEXT:    sqrshl z0.d, p0/m, z0.d, z1.d
767; CHECK-NEXT:    ret
768  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrshl.nxv2i64(<vscale x 2 x i1> %pg,
769                                                                  <vscale x 2 x i64> %a,
770                                                                  <vscale x 2 x i64> %b)
771  ret <vscale x 2 x i64> %out
772}
773
774;
775; SQRSHLR
776;
777
778define <vscale x 16 x i8> @sqrshlr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
779; CHECK-LABEL: sqrshlr_i8:
780; CHECK:       // %bb.0:
781; CHECK-NEXT:    ptrue p0.b
782; CHECK-NEXT:    sqrshlr z0.b, p0/m, z0.b, z1.b
783; CHECK-NEXT:    ret
784  %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
785  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshl.nxv16i8(<vscale x 16 x i1> %pg,
786                                                                 <vscale x 16 x i8> %b,
787                                                                 <vscale x 16 x i8> %a)
788  ret <vscale x 16 x i8> %out
789}
790
791define <vscale x 8 x i16> @sqrshlr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
792; CHECK-LABEL: sqrshlr_i16:
793; CHECK:       // %bb.0:
794; CHECK-NEXT:    ptrue p0.h
795; CHECK-NEXT:    sqrshlr z0.h, p0/m, z0.h, z1.h
796; CHECK-NEXT:    ret
797  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
798  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshl.nxv8i16(<vscale x 8 x i1> %pg,
799                                                                 <vscale x 8 x i16> %b,
800                                                                 <vscale x 8 x i16> %a)
801  ret <vscale x 8 x i16> %out
802}
803
804define <vscale x 4 x i32> @sqrshlr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
805; CHECK-LABEL: sqrshlr_i32:
806; CHECK:       // %bb.0:
807; CHECK-NEXT:    ptrue p0.s
808; CHECK-NEXT:    sqrshlr z0.s, p0/m, z0.s, z1.s
809; CHECK-NEXT:    ret
810  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
811  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshl.nxv4i32(<vscale x 4 x i1> %pg,
812                                                                 <vscale x 4 x i32> %b,
813                                                                 <vscale x 4 x i32> %a)
814  ret <vscale x 4 x i32> %out
815}
816
817define <vscale x 2 x i64> @sqrshlr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
818; CHECK-LABEL: sqrshlr_i64:
819; CHECK:       // %bb.0:
820; CHECK-NEXT:    ptrue p0.d
821; CHECK-NEXT:    sqrshlr z0.d, p0/m, z0.d, z1.d
822; CHECK-NEXT:    ret
823  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
824  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrshl.nxv2i64(<vscale x 2 x i1> %pg,
825                                                                 <vscale x 2 x i64> %b,
826                                                                 <vscale x 2 x i64> %a)
827  ret <vscale x 2 x i64> %out
828}
829
830define <vscale x 2 x i64> @sqrshlr_i64_noptrue(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
831; CHECK-LABEL: sqrshlr_i64_noptrue:
832; CHECK:       // %bb.0:
833; CHECK-NEXT:    sqrshl z1.d, p0/m, z1.d, z0.d
834; CHECK-NEXT:    mov z0.d, z1.d
835; CHECK-NEXT:    ret
836  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrshl.nxv2i64(<vscale x 2 x i1> %pg,
837                                                                 <vscale x 2 x i64> %b,
838                                                                 <vscale x 2 x i64> %a)
839  ret <vscale x 2 x i64> %out
840}
841
842;
843; SQSHL (Vectors)
844;
845
846define <vscale x 16 x i8> @sqshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
847; CHECK-LABEL: sqshl_i8:
848; CHECK:       // %bb.0:
849; CHECK-NEXT:    sqshl z0.b, p0/m, z0.b, z1.b
850; CHECK-NEXT:    ret
851  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshl.nxv16i8(<vscale x 16 x i1> %pg,
852                                                                 <vscale x 16 x i8> %a,
853                                                                 <vscale x 16 x i8> %b)
854  ret <vscale x 16 x i8> %out
855}
856
857define <vscale x 8 x i16> @sqshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
858; CHECK-LABEL: sqshl_i16:
859; CHECK:       // %bb.0:
860; CHECK-NEXT:    sqshl z0.h, p0/m, z0.h, z1.h
861; CHECK-NEXT:    ret
862  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshl.nxv8i16(<vscale x 8 x i1> %pg,
863                                                                 <vscale x 8 x i16> %a,
864                                                                 <vscale x 8 x i16> %b)
865  ret <vscale x 8 x i16> %out
866}
867
868define <vscale x 4 x i32> @sqshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
869; CHECK-LABEL: sqshl_i32:
870; CHECK:       // %bb.0:
871; CHECK-NEXT:    sqshl z0.s, p0/m, z0.s, z1.s
872; CHECK-NEXT:    ret
873  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.nxv4i32(<vscale x 4 x i1> %pg,
874                                                                 <vscale x 4 x i32> %a,
875                                                                 <vscale x 4 x i32> %b)
876  ret <vscale x 4 x i32> %out
877}
878
879define <vscale x 2 x i64> @sqshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
880; CHECK-LABEL: sqshl_i64:
881; CHECK:       // %bb.0:
882; CHECK-NEXT:    sqshl z0.d, p0/m, z0.d, z1.d
883; CHECK-NEXT:    ret
884  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqshl.nxv2i64(<vscale x 2 x i1> %pg,
885                                                                 <vscale x 2 x i64> %a,
886                                                                 <vscale x 2 x i64> %b)
887  ret <vscale x 2 x i64> %out
888}
889
890;
891; SQSHLR
892;
893
894define <vscale x 16 x i8> @sqshlr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
895; CHECK-LABEL: sqshlr_i8:
896; CHECK:       // %bb.0:
897; CHECK-NEXT:    ptrue p0.b
898; CHECK-NEXT:    sqshlr z0.b, p0/m, z0.b, z1.b
899; CHECK-NEXT:    ret
900  %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
901  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshl.nxv16i8(<vscale x 16 x i1> %pg,
902                                                                 <vscale x 16 x i8> %b,
903                                                                 <vscale x 16 x i8> %a)
904  ret <vscale x 16 x i8> %out
905}
906
907define <vscale x 8 x i16> @sqshlr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
908; CHECK-LABEL: sqshlr_i16:
909; CHECK:       // %bb.0:
910; CHECK-NEXT:    ptrue p0.h
911; CHECK-NEXT:    sqshlr z0.h, p0/m, z0.h, z1.h
912; CHECK-NEXT:    ret
913  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
914  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshl.nxv8i16(<vscale x 8 x i1> %pg,
915                                                                 <vscale x 8 x i16> %b,
916                                                                 <vscale x 8 x i16> %a)
917  ret <vscale x 8 x i16> %out
918}
919
920define <vscale x 4 x i32> @sqshlr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
921; CHECK-LABEL: sqshlr_i32:
922; CHECK:       // %bb.0:
923; CHECK-NEXT:    ptrue p0.s
924; CHECK-NEXT:    sqshlr z0.s, p0/m, z0.s, z1.s
925; CHECK-NEXT:    ret
926  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
927  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.nxv4i32(<vscale x 4 x i1> %pg,
928                                                                 <vscale x 4 x i32> %b,
929                                                                 <vscale x 4 x i32> %a)
930  ret <vscale x 4 x i32> %out
931}
932
933define <vscale x 2 x i64> @sqshlr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
934; CHECK-LABEL: sqshlr_i64:
935; CHECK:       // %bb.0:
936; CHECK-NEXT:    ptrue p0.d
937; CHECK-NEXT:    sqshlr z0.d, p0/m, z0.d, z1.d
938; CHECK-NEXT:    ret
939  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
940  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqshl.nxv2i64(<vscale x 2 x i1> %pg,
941                                                                 <vscale x 2 x i64> %b,
942                                                                 <vscale x 2 x i64> %a)
943  ret <vscale x 2 x i64> %out
944}
945
946define <vscale x 2 x i64> @sqshlr_i64_noptrue(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
947; CHECK-LABEL: sqshlr_i64_noptrue:
948; CHECK:       // %bb.0:
949; CHECK-NEXT:    sqshl z1.d, p0/m, z1.d, z0.d
950; CHECK-NEXT:    mov z0.d, z1.d
951; CHECK-NEXT:    ret
952  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqshl.nxv2i64(<vscale x 2 x i1> %pg,
953                                                                 <vscale x 2 x i64> %b,
954                                                                 <vscale x 2 x i64> %a)
955  ret <vscale x 2 x i64> %out
956}
957
958;
959; SQSHL (Scalar)
960;
961
962define <vscale x 16 x i8> @sqshl_n_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
963; CHECK-LABEL: sqshl_n_i8:
964; CHECK:       // %bb.0:
965; CHECK-NEXT:    sqshl z0.b, p0/m, z0.b, #7
966; CHECK-NEXT:    ret
967  %dup = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 7)
968  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshl.nxv16i8(<vscale x 16 x i1> %pg,
969                                                                 <vscale x 16 x i8> %a,
970                                                                 <vscale x 16 x i8> %dup)
971  ret <vscale x 16 x i8> %out
972}
973
974define <vscale x 8 x i16> @sqshl_n_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
975; CHECK-LABEL: sqshl_n_i16:
976; CHECK:       // %bb.0:
977; CHECK-NEXT:    sqshl z0.h, p0/m, z0.h, #15
978; CHECK-NEXT:    ret
979  %dup = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 15)
980  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshl.nxv8i16(<vscale x 8 x i1> %pg,
981                                                                 <vscale x 8 x i16> %a,
982                                                                 <vscale x 8 x i16> %dup)
983  ret <vscale x 8 x i16> %out
984}
985
986define <vscale x 4 x i32> @sqshl_n_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
987; CHECK-LABEL: sqshl_n_i32:
988; CHECK:       // %bb.0:
989; CHECK-NEXT:    sqshl z0.s, p0/m, z0.s, #31
990; CHECK-NEXT:    ret
991  %dup = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 31)
992  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.nxv4i32(<vscale x 4 x i1> %pg,
993                                                                 <vscale x 4 x i32> %a,
994                                                                 <vscale x 4 x i32> %dup)
995  ret <vscale x 4 x i32> %out
996}
997
998define <vscale x 2 x i64> @sqshl_n_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
999; CHECK-LABEL: sqshl_n_i64:
1000; CHECK:       // %bb.0:
1001; CHECK-NEXT:    sqshl z0.d, p0/m, z0.d, #63
1002; CHECK-NEXT:    ret
1003  %dup = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 63)
1004  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqshl.nxv2i64(<vscale x 2 x i1> %pg,
1005                                                                 <vscale x 2 x i64> %a,
1006                                                                 <vscale x 2 x i64> %dup)
1007  ret <vscale x 2 x i64> %out
1008}
1009
1010define <vscale x 16 x i8> @sqshl_n_i8_range(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
1011; CHECK-LABEL: sqshl_n_i8_range:
1012; CHECK:       // %bb.0:
1013; CHECK-NEXT:    mov z1.b, #8 // =0x8
1014; CHECK-NEXT:    sqshl z0.b, p0/m, z0.b, z1.b
1015; CHECK-NEXT:    ret
1016  %dup = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 8)
1017  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshl.nxv16i8(<vscale x 16 x i1> %pg,
1018                                                                 <vscale x 16 x i8> %a,
1019                                                                 <vscale x 16 x i8> %dup)
1020  ret <vscale x 16 x i8> %out
1021}
1022
1023define <vscale x 8 x i16> @sqshl_n_i16_range(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
1024; CHECK-LABEL: sqshl_n_i16_range:
1025; CHECK:       // %bb.0:
1026; CHECK-NEXT:    mov z1.h, #16 // =0x10
1027; CHECK-NEXT:    sqshl z0.h, p0/m, z0.h, z1.h
1028; CHECK-NEXT:    ret
1029  %dup = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 16)
1030  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshl.nxv8i16(<vscale x 8 x i1> %pg,
1031                                                                 <vscale x 8 x i16> %a,
1032                                                                 <vscale x 8 x i16> %dup)
1033  ret <vscale x 8 x i16> %out
1034}
1035
1036define <vscale x 4 x i32> @sqshl_n_i32_range(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
1037; CHECK-LABEL: sqshl_n_i32_range:
1038; CHECK:       // %bb.0:
1039; CHECK-NEXT:    mov z1.s, #32 // =0x20
1040; CHECK-NEXT:    sqshl z0.s, p0/m, z0.s, z1.s
1041; CHECK-NEXT:    ret
1042  %dup = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 32)
1043  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.nxv4i32(<vscale x 4 x i1> %pg,
1044                                                                 <vscale x 4 x i32> %a,
1045                                                                 <vscale x 4 x i32> %dup)
1046  ret <vscale x 4 x i32> %out
1047}
1048
1049define <vscale x 2 x i64> @sqshl_n_i64_range(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
1050; CHECK-LABEL: sqshl_n_i64_range:
1051; CHECK:       // %bb.0:
1052; CHECK-NEXT:    mov z1.d, #64 // =0x40
1053; CHECK-NEXT:    sqshl z0.d, p0/m, z0.d, z1.d
1054; CHECK-NEXT:    ret
1055  %dup = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 64)
1056  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqshl.nxv2i64(<vscale x 2 x i1> %pg,
1057                                                                 <vscale x 2 x i64> %a,
1058                                                                 <vscale x 2 x i64> %dup)
1059  ret <vscale x 2 x i64> %out
1060}
1061
1062;
1063; SQSHLU
1064;
1065
1066define <vscale x 16 x i8> @sqshlu_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
1067; CHECK-LABEL: sqshlu_i8:
1068; CHECK:       // %bb.0:
1069; CHECK-NEXT:    sqshlu z0.b, p0/m, z0.b, #2
1070; CHECK-NEXT:    ret
1071  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshlu.nxv16i8(<vscale x 16 x i1> %pg,
1072                                                                  <vscale x 16 x i8> %a,
1073                                                                  i32 2)
1074  ret <vscale x 16 x i8> %out
1075}
1076
1077define <vscale x 8 x i16> @sqshlu_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
1078; CHECK-LABEL: sqshlu_i16:
1079; CHECK:       // %bb.0:
1080; CHECK-NEXT:    sqshlu z0.h, p0/m, z0.h, #3
1081; CHECK-NEXT:    ret
1082  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshlu.nxv8i16(<vscale x 8 x i1> %pg,
1083                                                                  <vscale x 8 x i16> %a,
1084                                                                  i32 3)
1085  ret <vscale x 8 x i16> %out
1086}
1087
1088define <vscale x 4 x i32> @sqshlu_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
1089; CHECK-LABEL: sqshlu_i32:
1090; CHECK:       // %bb.0:
1091; CHECK-NEXT:    sqshlu z0.s, p0/m, z0.s, #29
1092; CHECK-NEXT:    ret
1093  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshlu.nxv4i32(<vscale x 4 x i1> %pg,
1094                                                                  <vscale x 4 x i32> %a,
1095                                                                  i32 29)
1096  ret <vscale x 4 x i32> %out
1097}
1098
1099define <vscale x 2 x i64> @sqshlu_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
1100; CHECK-LABEL: sqshlu_i64:
1101; CHECK:       // %bb.0:
1102; CHECK-NEXT:    sqshlu z0.d, p0/m, z0.d, #62
1103; CHECK-NEXT:    ret
1104  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqshlu.nxv2i64(<vscale x 2 x i1> %pg,
1105                                                                  <vscale x 2 x i64> %a,
1106                                                                  i32 62)
1107  ret <vscale x 2 x i64> %out
1108}
1109
1110;
1111; SQSUB
1112;
1113
1114define <vscale x 16 x i8> @sqsub_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1115; CHECK-LABEL: sqsub_i8:
1116; CHECK:       // %bb.0:
1117; CHECK-NEXT:    sqsub z0.b, p0/m, z0.b, z1.b
1118; CHECK-NEXT:    ret
1119  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.nxv16i8(<vscale x 16 x i1> %pg,
1120                                                                 <vscale x 16 x i8> %a,
1121                                                                 <vscale x 16 x i8> %b)
1122  ret <vscale x 16 x i8> %out
1123}
1124
1125define <vscale x 8 x i16> @sqsub_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1126; CHECK-LABEL: sqsub_i16:
1127; CHECK:       // %bb.0:
1128; CHECK-NEXT:    sqsub z0.h, p0/m, z0.h, z1.h
1129; CHECK-NEXT:    ret
1130  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.nxv8i16(<vscale x 8 x i1> %pg,
1131                                                                 <vscale x 8 x i16> %a,
1132                                                                 <vscale x 8 x i16> %b)
1133  ret <vscale x 8 x i16> %out
1134}
1135
1136define <vscale x 4 x i32> @sqsub_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1137; CHECK-LABEL: sqsub_i32:
1138; CHECK:       // %bb.0:
1139; CHECK-NEXT:    sqsub z0.s, p0/m, z0.s, z1.s
1140; CHECK-NEXT:    ret
1141  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.nxv4i32(<vscale x 4 x i1> %pg,
1142                                                                 <vscale x 4 x i32> %a,
1143                                                                 <vscale x 4 x i32> %b)
1144  ret <vscale x 4 x i32> %out
1145}
1146
1147define <vscale x 2 x i64> @sqsub_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
1148; CHECK-LABEL: sqsub_i64:
1149; CHECK:       // %bb.0:
1150; CHECK-NEXT:    sqsub z0.d, p0/m, z0.d, z1.d
1151; CHECK-NEXT:    ret
1152  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.nxv2i64(<vscale x 2 x i1> %pg,
1153                                                                 <vscale x 2 x i64> %a,
1154                                                                 <vscale x 2 x i64> %b)
1155  ret <vscale x 2 x i64> %out
1156}
1157
1158;
1159; SQSUBR
1160;
1161
1162define <vscale x 16 x i8> @sqsubr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1163; CHECK-LABEL: sqsubr_i8:
1164; CHECK:       // %bb.0:
1165; CHECK-NEXT:    sqsubr z0.b, p0/m, z0.b, z1.b
1166; CHECK-NEXT:    ret
1167  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqsubr.nxv16i8(<vscale x 16 x i1> %pg,
1168                                                                  <vscale x 16 x i8> %a,
1169                                                                  <vscale x 16 x i8> %b)
1170  ret <vscale x 16 x i8> %out
1171}
1172
1173define <vscale x 8 x i16> @sqsubr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1174; CHECK-LABEL: sqsubr_i16:
1175; CHECK:       // %bb.0:
1176; CHECK-NEXT:    sqsubr z0.h, p0/m, z0.h, z1.h
1177; CHECK-NEXT:    ret
1178  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqsubr.nxv8i16(<vscale x 8 x i1> %pg,
1179                                                                  <vscale x 8 x i16> %a,
1180                                                                  <vscale x 8 x i16> %b)
1181  ret <vscale x 8 x i16> %out
1182}
1183
1184define <vscale x 4 x i32> @sqsubr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1185; CHECK-LABEL: sqsubr_i32:
1186; CHECK:       // %bb.0:
1187; CHECK-NEXT:    sqsubr z0.s, p0/m, z0.s, z1.s
1188; CHECK-NEXT:    ret
1189  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqsubr.nxv4i32(<vscale x 4 x i1> %pg,
1190                                                                  <vscale x 4 x i32> %a,
1191                                                                  <vscale x 4 x i32> %b)
1192  ret <vscale x 4 x i32> %out
1193}
1194
1195define <vscale x 2 x i64> @sqsubr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
1196; CHECK-LABEL: sqsubr_i64:
1197; CHECK:       // %bb.0:
1198; CHECK-NEXT:    sqsubr z0.d, p0/m, z0.d, z1.d
1199; CHECK-NEXT:    ret
1200  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqsubr.nxv2i64(<vscale x 2 x i1> %pg,
1201                                                                  <vscale x 2 x i64> %a,
1202                                                                  <vscale x 2 x i64> %b)
1203  ret <vscale x 2 x i64> %out
1204}
1205
1206;
1207; SRHADD
1208;
1209
1210define <vscale x 16 x i8> @srhadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1211; CHECK-LABEL: srhadd_i8:
1212; CHECK:       // %bb.0:
1213; CHECK-NEXT:    srhadd z0.b, p0/m, z0.b, z1.b
1214; CHECK-NEXT:    ret
1215  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.srhadd.nxv16i8(<vscale x 16 x i1> %pg,
1216                                                                  <vscale x 16 x i8> %a,
1217                                                                  <vscale x 16 x i8> %b)
1218  ret <vscale x 16 x i8> %out
1219}
1220
1221define <vscale x 8 x i16> @srhadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1222; CHECK-LABEL: srhadd_i16:
1223; CHECK:       // %bb.0:
1224; CHECK-NEXT:    srhadd z0.h, p0/m, z0.h, z1.h
1225; CHECK-NEXT:    ret
1226  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.srhadd.nxv8i16(<vscale x 8 x i1> %pg,
1227                                                                  <vscale x 8 x i16> %a,
1228                                                                  <vscale x 8 x i16> %b)
1229  ret <vscale x 8 x i16> %out
1230}
1231
1232define <vscale x 4 x i32> @srhadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1233; CHECK-LABEL: srhadd_i32:
1234; CHECK:       // %bb.0:
1235; CHECK-NEXT:    srhadd z0.s, p0/m, z0.s, z1.s
1236; CHECK-NEXT:    ret
1237  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.srhadd.nxv4i32(<vscale x 4 x i1> %pg,
1238                                                                  <vscale x 4 x i32> %a,
1239                                                                  <vscale x 4 x i32> %b)
1240  ret <vscale x 4 x i32> %out
1241}
1242
1243define <vscale x 2 x i64> @srhadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
1244; CHECK-LABEL: srhadd_i64:
1245; CHECK:       // %bb.0:
1246; CHECK-NEXT:    srhadd z0.d, p0/m, z0.d, z1.d
1247; CHECK-NEXT:    ret
1248  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.srhadd.nxv2i64(<vscale x 2 x i1> %pg,
1249                                                                  <vscale x 2 x i64> %a,
1250                                                                  <vscale x 2 x i64> %b)
1251  ret <vscale x 2 x i64> %out
1252}
1253
1254;
1255; SRI
1256;
1257
1258define <vscale x 16 x i8> @sri_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1259; CHECK-LABEL: sri_i8:
1260; CHECK:       // %bb.0:
1261; CHECK-NEXT:    sri z0.b, z1.b, #1
1262; CHECK-NEXT:    ret
1263  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sri.nxv16i8(<vscale x 16 x i8> %a,
1264                                                               <vscale x 16 x i8> %b,
1265                                                               i32 1)
1266  ret <vscale x 16 x i8> %out
1267}
1268
1269define <vscale x 8 x i16> @sri_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1270; CHECK-LABEL: sri_i16:
1271; CHECK:       // %bb.0:
1272; CHECK-NEXT:    sri z0.h, z1.h, #16
1273; CHECK-NEXT:    ret
1274  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sri.nxv8i16(<vscale x 8 x i16> %a,
1275                                                               <vscale x 8 x i16> %b,
1276                                                               i32 16)
1277  ret <vscale x 8 x i16> %out
1278}
1279
1280define <vscale x 4 x i32> @sri_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1281; CHECK-LABEL: sri_i32:
1282; CHECK:       // %bb.0:
1283; CHECK-NEXT:    sri z0.s, z1.s, #32
1284; CHECK-NEXT:    ret
1285  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sri.nxv4i32(<vscale x 4 x i32> %a,
1286                                                               <vscale x 4 x i32> %b,
1287                                                               i32 32);
1288  ret <vscale x 4 x i32> %out
1289}
1290
1291define <vscale x 2 x i64> @sri_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
1292; CHECK-LABEL: sri_i64:
1293; CHECK:       // %bb.0:
1294; CHECK-NEXT:    sri z0.d, z1.d, #64
1295; CHECK-NEXT:    ret
1296  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sri.nxv2i64(<vscale x 2 x i64> %a,
1297                                                               <vscale x 2 x i64> %b,
1298                                                               i32 64)
1299  ret <vscale x 2 x i64> %out
1300}
1301
1302;
1303; SRSHL
1304;
1305
1306define <vscale x 16 x i8> @srshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1307; CHECK-LABEL: srshl_i8:
1308; CHECK:       // %bb.0:
1309; CHECK-NEXT:    srshl z0.b, p0/m, z0.b, z1.b
1310; CHECK-NEXT:    ret
1311  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.srshl.nxv16i8(<vscale x 16 x i1> %pg,
1312                                                                 <vscale x 16 x i8> %a,
1313                                                                 <vscale x 16 x i8> %b)
1314  ret <vscale x 16 x i8> %out
1315}
1316
1317define <vscale x 8 x i16> @srshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1318; CHECK-LABEL: srshl_i16:
1319; CHECK:       // %bb.0:
1320; CHECK-NEXT:    srshl z0.h, p0/m, z0.h, z1.h
1321; CHECK-NEXT:    ret
1322  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> %pg,
1323                                                                 <vscale x 8 x i16> %a,
1324                                                                 <vscale x 8 x i16> %b)
1325  ret <vscale x 8 x i16> %out
1326}
1327
1328define <vscale x 4 x i32> @srshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1329; CHECK-LABEL: srshl_i32:
1330; CHECK:       // %bb.0:
1331; CHECK-NEXT:    srshl z0.s, p0/m, z0.s, z1.s
1332; CHECK-NEXT:    ret
1333  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.srshl.nxv4i32(<vscale x 4 x i1> %pg,
1334                                                                 <vscale x 4 x i32> %a,
1335                                                                 <vscale x 4 x i32> %b)
1336  ret <vscale x 4 x i32> %out
1337}
1338
1339define <vscale x 2 x i64> @srshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
1340; CHECK-LABEL: srshl_i64:
1341; CHECK:       // %bb.0:
1342; CHECK-NEXT:    srshl z0.d, p0/m, z0.d, z1.d
1343; CHECK-NEXT:    ret
1344  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.srshl.nxv2i64(<vscale x 2 x i1> %pg,
1345                                                                 <vscale x 2 x i64> %a,
1346                                                                 <vscale x 2 x i64> %b)
1347  ret <vscale x 2 x i64> %out
1348}
1349
1350;
1351; SRSHLR
1352;
1353
1354define <vscale x 16 x i8> @srshlr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1355; CHECK-LABEL: srshlr_i8:
1356; CHECK:       // %bb.0:
1357; CHECK-NEXT:    ptrue p0.b
1358; CHECK-NEXT:    srshlr z0.b, p0/m, z0.b, z1.b
1359; CHECK-NEXT:    ret
1360  %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
1361  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.srshl.nxv16i8(<vscale x 16 x i1> %pg,
1362                                                                 <vscale x 16 x i8> %b,
1363                                                                 <vscale x 16 x i8> %a)
1364  ret <vscale x 16 x i8> %out
1365}
1366
1367define <vscale x 8 x i16> @srshlr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1368; CHECK-LABEL: srshlr_i16:
1369; CHECK:       // %bb.0:
1370; CHECK-NEXT:    ptrue p0.h
1371; CHECK-NEXT:    srshlr z0.h, p0/m, z0.h, z1.h
1372; CHECK-NEXT:    ret
1373  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
1374  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> %pg,
1375                                                                 <vscale x 8 x i16> %b,
1376                                                                 <vscale x 8 x i16> %a)
1377  ret <vscale x 8 x i16> %out
1378}
1379
1380define <vscale x 4 x i32> @srshlr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1381; CHECK-LABEL: srshlr_i32:
1382; CHECK:       // %bb.0:
1383; CHECK-NEXT:    ptrue p0.s
1384; CHECK-NEXT:    srshlr z0.s, p0/m, z0.s, z1.s
1385; CHECK-NEXT:    ret
1386  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
1387  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.srshl.nxv4i32(<vscale x 4 x i1> %pg,
1388                                                                 <vscale x 4 x i32> %b,
1389                                                                 <vscale x 4 x i32> %a)
1390  ret <vscale x 4 x i32> %out
1391}
1392
1393define <vscale x 2 x i64> @srshlr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
1394; CHECK-LABEL: srshlr_i64:
1395; CHECK:       // %bb.0:
1396; CHECK-NEXT:    ptrue p0.d
1397; CHECK-NEXT:    srshlr z0.d, p0/m, z0.d, z1.d
1398; CHECK-NEXT:    ret
1399  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
1400  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.srshl.nxv2i64(<vscale x 2 x i1> %pg,
1401                                                                 <vscale x 2 x i64> %b,
1402                                                                 <vscale x 2 x i64> %a)
1403  ret <vscale x 2 x i64> %out
1404}
1405
1406define <vscale x 2 x i64> @srshlr_i64_noptrue(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
1407; CHECK-LABEL: srshlr_i64_noptrue:
1408; CHECK:       // %bb.0:
1409; CHECK-NEXT:    srshl z1.d, p0/m, z1.d, z0.d
1410; CHECK-NEXT:    mov z0.d, z1.d
1411; CHECK-NEXT:    ret
1412  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.srshl.nxv2i64(<vscale x 2 x i1> %pg,
1413                                                                 <vscale x 2 x i64> %b,
1414                                                                 <vscale x 2 x i64> %a)
1415  ret <vscale x 2 x i64> %out
1416}
1417
1418;
1419; SRSHR
1420;
1421
1422define <vscale x 16 x i8> @srshr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
1423; CHECK-LABEL: srshr_i8:
1424; CHECK:       // %bb.0:
1425; CHECK-NEXT:    srshr z0.b, p0/m, z0.b, #8
1426; CHECK-NEXT:    ret
1427  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.srshr.nxv16i8(<vscale x 16 x i1> %pg,
1428                                                                 <vscale x 16 x i8> %a,
1429                                                                 i32 8)
1430  ret <vscale x 16 x i8> %out
1431}
1432
1433define <vscale x 8 x i16> @srshr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
1434; CHECK-LABEL: srshr_i16:
1435; CHECK:       // %bb.0:
1436; CHECK-NEXT:    srshr z0.h, p0/m, z0.h, #1
1437; CHECK-NEXT:    ret
1438  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.srshr.nxv8i16(<vscale x 8 x i1> %pg,
1439                                                                 <vscale x 8 x i16> %a,
1440                                                                 i32 1)
1441  ret <vscale x 8 x i16> %out
1442}
1443
1444define <vscale x 4 x i32> @srshr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
1445; CHECK-LABEL: srshr_i32:
1446; CHECK:       // %bb.0:
1447; CHECK-NEXT:    srshr z0.s, p0/m, z0.s, #22
1448; CHECK-NEXT:    ret
1449  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.srshr.nxv4i32(<vscale x 4 x i1> %pg,
1450                                                                 <vscale x 4 x i32> %a,
1451                                                                 i32 22)
1452  ret <vscale x 4 x i32> %out
1453}
1454
1455define <vscale x 2 x i64> @srshr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
1456; CHECK-LABEL: srshr_i64:
1457; CHECK:       // %bb.0:
1458; CHECK-NEXT:    srshr z0.d, p0/m, z0.d, #54
1459; CHECK-NEXT:    ret
1460  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.srshr.nxv2i64(<vscale x 2 x i1> %pg,
1461                                                                 <vscale x 2 x i64> %a,
1462                                                                 i32 54)
1463  ret <vscale x 2 x i64> %out
1464}
1465
1466;
1467; SRSRA
1468;
1469
1470define <vscale x 16 x i8> @srsra_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1471; CHECK-LABEL: srsra_i8:
1472; CHECK:       // %bb.0:
1473; CHECK-NEXT:    srsra z0.b, z1.b, #2
1474; CHECK-NEXT:    ret
1475  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.srsra.nxv16i8(<vscale x 16 x i8> %a,
1476                                                                 <vscale x 16 x i8> %b,
1477                                                                 i32 2)
1478  ret <vscale x 16 x i8> %out
1479}
1480
1481define <vscale x 8 x i16> @srsra_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1482; CHECK-LABEL: srsra_i16:
1483; CHECK:       // %bb.0:
1484; CHECK-NEXT:    srsra z0.h, z1.h, #15
1485; CHECK-NEXT:    ret
1486  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.srsra.nxv8i16(<vscale x 8 x i16> %a,
1487                                                                 <vscale x 8 x i16> %b,
1488                                                                 i32 15)
1489  ret <vscale x 8 x i16> %out
1490}
1491
1492define <vscale x 4 x i32> @srsra_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1493; CHECK-LABEL: srsra_i32:
1494; CHECK:       // %bb.0:
1495; CHECK-NEXT:    srsra z0.s, z1.s, #12
1496; CHECK-NEXT:    ret
1497  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.srsra.nxv4i32(<vscale x 4 x i32> %a,
1498                                                                 <vscale x 4 x i32> %b,
1499                                                                 i32 12)
1500  ret <vscale x 4 x i32> %out
1501}
1502
1503define <vscale x 2 x i64> @srsra_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
1504; CHECK-LABEL: srsra_i64:
1505; CHECK:       // %bb.0:
1506; CHECK-NEXT:    srsra z0.d, z1.d, #44
1507; CHECK-NEXT:    ret
1508  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.srsra.nxv2i64(<vscale x 2 x i64> %a,
1509                                                                 <vscale x 2 x i64> %b,
1510                                                                 i32 44)
1511  ret <vscale x 2 x i64> %out
1512}
1513
1514;
1515; SSRA
1516;
1517
1518define <vscale x 16 x i8> @ssra_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1519; CHECK-LABEL: ssra_i8:
1520; CHECK:       // %bb.0:
1521; CHECK-NEXT:    ssra z0.b, z1.b, #3
1522; CHECK-NEXT:    ret
1523  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.ssra.nxv16i8(<vscale x 16 x i8> %a,
1524                                                                <vscale x 16 x i8> %b,
1525                                                                i32 3)
1526  ret <vscale x 16 x i8> %out
1527}
1528
1529define <vscale x 8 x i16> @ssra_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1530; CHECK-LABEL: ssra_i16:
1531; CHECK:       // %bb.0:
1532; CHECK-NEXT:    ssra z0.h, z1.h, #14
1533; CHECK-NEXT:    ret
1534  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ssra.nxv8i16(<vscale x 8 x i16> %a,
1535                                                                <vscale x 8 x i16> %b,
1536                                                                i32 14)
1537  ret <vscale x 8 x i16> %out
1538}
1539
1540define <vscale x 4 x i32> @ssra_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1541; CHECK-LABEL: ssra_i32:
1542; CHECK:       // %bb.0:
1543; CHECK-NEXT:    ssra z0.s, z1.s, #2
1544; CHECK-NEXT:    ret
1545  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ssra.nxv4i32(<vscale x 4 x i32> %a,
1546                                                                <vscale x 4 x i32> %b,
1547                                                                i32 2)
1548  ret <vscale x 4 x i32> %out
1549}
1550
1551define <vscale x 2 x i64> @ssra_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
1552; CHECK-LABEL: ssra_i64:
1553; CHECK:       // %bb.0:
1554; CHECK-NEXT:    ssra z0.d, z1.d, #34
1555; CHECK-NEXT:    ret
1556  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ssra.nxv2i64(<vscale x 2 x i64> %a,
1557                                                                <vscale x 2 x i64> %b,
1558                                                                i32 34)
1559  ret <vscale x 2 x i64> %out
1560}
1561
1562;
1563; SUQADD
1564;
1565
1566define <vscale x 16 x i8> @suqadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1567; CHECK-LABEL: suqadd_i8:
1568; CHECK:       // %bb.0:
1569; CHECK-NEXT:    suqadd z0.b, p0/m, z0.b, z1.b
1570; CHECK-NEXT:    ret
1571  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.suqadd.nxv16i8(<vscale x 16 x i1> %pg,
1572                                                                  <vscale x 16 x i8> %a,
1573                                                                  <vscale x 16 x i8> %b)
1574  ret <vscale x 16 x i8> %out
1575}
1576
1577define <vscale x 8 x i16> @suqadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1578; CHECK-LABEL: suqadd_i16:
1579; CHECK:       // %bb.0:
1580; CHECK-NEXT:    suqadd z0.h, p0/m, z0.h, z1.h
1581; CHECK-NEXT:    ret
1582  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.suqadd.nxv8i16(<vscale x 8 x i1> %pg,
1583                                                                  <vscale x 8 x i16> %a,
1584                                                                  <vscale x 8 x i16> %b)
1585  ret <vscale x 8 x i16> %out
1586}
1587
1588define <vscale x 4 x i32> @suqadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1589; CHECK-LABEL: suqadd_i32:
1590; CHECK:       // %bb.0:
1591; CHECK-NEXT:    suqadd z0.s, p0/m, z0.s, z1.s
1592; CHECK-NEXT:    ret
1593  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.suqadd.nxv4i32(<vscale x 4 x i1> %pg,
1594                                                                  <vscale x 4 x i32> %a,
1595                                                                  <vscale x 4 x i32> %b)
1596  ret <vscale x 4 x i32> %out
1597}
1598
1599define <vscale x 2 x i64> @suqadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
1600; CHECK-LABEL: suqadd_i64:
1601; CHECK:       // %bb.0:
1602; CHECK-NEXT:    suqadd z0.d, p0/m, z0.d, z1.d
1603; CHECK-NEXT:    ret
1604  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.suqadd.nxv2i64(<vscale x 2 x i1> %pg,
1605                                                                  <vscale x 2 x i64> %a,
1606                                                                  <vscale x 2 x i64> %b)
1607  ret <vscale x 2 x i64> %out
1608}
1609
1610;
1611; UABA
1612;
1613
1614define <vscale x 16 x i8> @uaba_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
1615; CHECK-LABEL: uaba_i8:
1616; CHECK:       // %bb.0:
1617; CHECK-NEXT:    uaba z0.b, z1.b, z2.b
1618; CHECK-NEXT:    ret
1619  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uaba.nxv16i8(<vscale x 16 x i8> %a,
1620                                                                <vscale x 16 x i8> %b,
1621                                                                <vscale x 16 x i8> %c)
1622  ret <vscale x 16 x i8> %out
1623}
1624
1625define <vscale x 8 x i16> @uaba_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
1626; CHECK-LABEL: uaba_i16:
1627; CHECK:       // %bb.0:
1628; CHECK-NEXT:    uaba z0.h, z1.h, z2.h
1629; CHECK-NEXT:    ret
1630  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uaba.nxv8i16(<vscale x 8 x i16> %a,
1631                                                                <vscale x 8 x i16> %b,
1632                                                                <vscale x 8 x i16> %c)
1633  ret <vscale x 8 x i16> %out
1634}
1635
1636define <vscale x 4 x i32> @uaba_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
1637; CHECK-LABEL: uaba_i32:
1638; CHECK:       // %bb.0:
1639; CHECK-NEXT:    uaba z0.s, z1.s, z2.s
1640; CHECK-NEXT:    ret
1641  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uaba.nxv4i32(<vscale x 4 x i32> %a,
1642                                                                <vscale x 4 x i32> %b,
1643                                                                <vscale x 4 x i32> %c)
1644  ret <vscale x 4 x i32> %out
1645}
1646
1647define <vscale x 2 x i64> @uaba_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
1648; CHECK-LABEL: uaba_i64:
1649; CHECK:       // %bb.0:
1650; CHECK-NEXT:    uaba z0.d, z1.d, z2.d
1651; CHECK-NEXT:    ret
1652  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uaba.nxv2i64(<vscale x 2 x i64> %a,
1653                                                                <vscale x 2 x i64> %b,
1654                                                                <vscale x 2 x i64> %c)
1655  ret <vscale x 2 x i64> %out
1656}
1657
1658;
1659; UHADD
1660;
1661
1662define <vscale x 16 x i8> @uhadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1663; CHECK-LABEL: uhadd_i8:
1664; CHECK:       // %bb.0:
1665; CHECK-NEXT:    uhadd z0.b, p0/m, z0.b, z1.b
1666; CHECK-NEXT:    ret
1667  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uhadd.nxv16i8(<vscale x 16 x i1> %pg,
1668                                                                 <vscale x 16 x i8> %a,
1669                                                                 <vscale x 16 x i8> %b)
1670  ret <vscale x 16 x i8> %out
1671}
1672
1673define <vscale x 8 x i16> @uhadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1674; CHECK-LABEL: uhadd_i16:
1675; CHECK:       // %bb.0:
1676; CHECK-NEXT:    uhadd z0.h, p0/m, z0.h, z1.h
1677; CHECK-NEXT:    ret
1678  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uhadd.nxv8i16(<vscale x 8 x i1> %pg,
1679                                                                 <vscale x 8 x i16> %a,
1680                                                                 <vscale x 8 x i16> %b)
1681  ret <vscale x 8 x i16> %out
1682}
1683
1684define <vscale x 4 x i32> @uhadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1685; CHECK-LABEL: uhadd_i32:
1686; CHECK:       // %bb.0:
1687; CHECK-NEXT:    uhadd z0.s, p0/m, z0.s, z1.s
1688; CHECK-NEXT:    ret
1689  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uhadd.nxv4i32(<vscale x 4 x i1> %pg,
1690                                                                 <vscale x 4 x i32> %a,
1691                                                                 <vscale x 4 x i32> %b)
1692  ret <vscale x 4 x i32> %out
1693}
1694
1695define <vscale x 2 x i64> @uhadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
1696; CHECK-LABEL: uhadd_i64:
1697; CHECK:       // %bb.0:
1698; CHECK-NEXT:    uhadd z0.d, p0/m, z0.d, z1.d
1699; CHECK-NEXT:    ret
1700  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uhadd.nxv2i64(<vscale x 2 x i1> %pg,
1701                                                                 <vscale x 2 x i64> %a,
1702                                                                 <vscale x 2 x i64> %b)
1703  ret <vscale x 2 x i64> %out
1704}
1705
1706;
1707; UHSUB
1708;
1709
1710define <vscale x 16 x i8> @uhsub_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1711; CHECK-LABEL: uhsub_i8:
1712; CHECK:       // %bb.0:
1713; CHECK-NEXT:    uhsub z0.b, p0/m, z0.b, z1.b
1714; CHECK-NEXT:    ret
1715  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uhsub.nxv16i8(<vscale x 16 x i1> %pg,
1716                                                                 <vscale x 16 x i8> %a,
1717                                                                 <vscale x 16 x i8> %b)
1718  ret <vscale x 16 x i8> %out
1719}
1720
1721define <vscale x 8 x i16> @uhsub_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1722; CHECK-LABEL: uhsub_i16:
1723; CHECK:       // %bb.0:
1724; CHECK-NEXT:    uhsub z0.h, p0/m, z0.h, z1.h
1725; CHECK-NEXT:    ret
1726  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uhsub.nxv8i16(<vscale x 8 x i1> %pg,
1727                                                                 <vscale x 8 x i16> %a,
1728                                                                 <vscale x 8 x i16> %b)
1729  ret <vscale x 8 x i16> %out
1730}
1731
1732define <vscale x 4 x i32> @uhsub_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1733; CHECK-LABEL: uhsub_i32:
1734; CHECK:       // %bb.0:
1735; CHECK-NEXT:    uhsub z0.s, p0/m, z0.s, z1.s
1736; CHECK-NEXT:    ret
1737  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.nxv4i32(<vscale x 4 x i1> %pg,
1738                                                                 <vscale x 4 x i32> %a,
1739                                                                 <vscale x 4 x i32> %b)
1740  ret <vscale x 4 x i32> %out
1741}
1742
1743define <vscale x 2 x i64> @uhsub_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
1744; CHECK-LABEL: uhsub_i64:
1745; CHECK:       // %bb.0:
1746; CHECK-NEXT:    uhsub z0.d, p0/m, z0.d, z1.d
1747; CHECK-NEXT:    ret
1748  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uhsub.nxv2i64(<vscale x 2 x i1> %pg,
1749                                                                 <vscale x 2 x i64> %a,
1750                                                                 <vscale x 2 x i64> %b)
1751  ret <vscale x 2 x i64> %out
1752}
1753
1754;
1755; UHSUBR
1756;
1757
1758define <vscale x 16 x i8> @uhsubr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1759; CHECK-LABEL: uhsubr_i8:
1760; CHECK:       // %bb.0:
1761; CHECK-NEXT:    uhsubr z0.b, p0/m, z0.b, z1.b
1762; CHECK-NEXT:    ret
1763  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uhsubr.nxv16i8(<vscale x 16 x i1> %pg,
1764                                                                  <vscale x 16 x i8> %a,
1765                                                                  <vscale x 16 x i8> %b)
1766  ret <vscale x 16 x i8> %out
1767}
1768
1769define <vscale x 8 x i16> @uhsubr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1770; CHECK-LABEL: uhsubr_i16:
1771; CHECK:       // %bb.0:
1772; CHECK-NEXT:    uhsubr z0.h, p0/m, z0.h, z1.h
1773; CHECK-NEXT:    ret
1774  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uhsubr.nxv8i16(<vscale x 8 x i1> %pg,
1775                                                                  <vscale x 8 x i16> %a,
1776                                                                  <vscale x 8 x i16> %b)
1777  ret <vscale x 8 x i16> %out
1778}
1779
1780define <vscale x 4 x i32> @uhsubr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1781; CHECK-LABEL: uhsubr_i32:
1782; CHECK:       // %bb.0:
1783; CHECK-NEXT:    uhsubr z0.s, p0/m, z0.s, z1.s
1784; CHECK-NEXT:    ret
1785  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uhsubr.nxv4i32(<vscale x 4 x i1> %pg,
1786                                                                  <vscale x 4 x i32> %a,
1787                                                                  <vscale x 4 x i32> %b)
1788  ret <vscale x 4 x i32> %out
1789}
1790
1791define <vscale x 2 x i64> @uhsubr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
1792; CHECK-LABEL: uhsubr_i64:
1793; CHECK:       // %bb.0:
1794; CHECK-NEXT:    uhsubr z0.d, p0/m, z0.d, z1.d
1795; CHECK-NEXT:    ret
1796  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uhsubr.nxv2i64(<vscale x 2 x i1> %pg,
1797                                                                  <vscale x 2 x i64> %a,
1798                                                                  <vscale x 2 x i64> %b)
1799  ret <vscale x 2 x i64> %out
1800}
1801
1802;
1803; UQADD
1804;
1805
1806define <vscale x 16 x i8> @uqadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1807; CHECK-LABEL: uqadd_i8:
1808; CHECK:       // %bb.0:
1809; CHECK-NEXT:    uqadd z0.b, p0/m, z0.b, z1.b
1810; CHECK-NEXT:    ret
1811  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqadd.nxv16i8(<vscale x 16 x i1> %pg,
1812                                                                 <vscale x 16 x i8> %a,
1813                                                                 <vscale x 16 x i8> %b)
1814  ret <vscale x 16 x i8> %out
1815}
1816
1817define <vscale x 8 x i16> @uqadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1818; CHECK-LABEL: uqadd_i16:
1819; CHECK:       // %bb.0:
1820; CHECK-NEXT:    uqadd z0.h, p0/m, z0.h, z1.h
1821; CHECK-NEXT:    ret
1822  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqadd.nxv8i16(<vscale x 8 x i1> %pg,
1823                                                                 <vscale x 8 x i16> %a,
1824                                                                 <vscale x 8 x i16> %b)
1825  ret <vscale x 8 x i16> %out
1826}
1827
1828define <vscale x 4 x i32> @uqadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1829; CHECK-LABEL: uqadd_i32:
1830; CHECK:       // %bb.0:
1831; CHECK-NEXT:    uqadd z0.s, p0/m, z0.s, z1.s
1832; CHECK-NEXT:    ret
1833  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqadd.nxv4i32(<vscale x 4 x i1> %pg,
1834                                                                 <vscale x 4 x i32> %a,
1835                                                                 <vscale x 4 x i32> %b)
1836  ret <vscale x 4 x i32> %out
1837}
1838
1839define <vscale x 2 x i64> @uqadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
1840; CHECK-LABEL: uqadd_i64:
1841; CHECK:       // %bb.0:
1842; CHECK-NEXT:    uqadd z0.d, p0/m, z0.d, z1.d
1843; CHECK-NEXT:    ret
1844  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqadd.nxv2i64(<vscale x 2 x i1> %pg,
1845                                                                 <vscale x 2 x i64> %a,
1846                                                                 <vscale x 2 x i64> %b)
1847  ret <vscale x 2 x i64> %out
1848}
1849
1850;
1851; UQRSHL
1852;
1853
1854define <vscale x 16 x i8> @uqrshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1855; CHECK-LABEL: uqrshl_i8:
1856; CHECK:       // %bb.0:
1857; CHECK-NEXT:    uqrshl z0.b, p0/m, z0.b, z1.b
1858; CHECK-NEXT:    ret
1859  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqrshl.nxv16i8(<vscale x 16 x i1> %pg,
1860                                                                  <vscale x 16 x i8> %a,
1861                                                                  <vscale x 16 x i8> %b)
1862  ret <vscale x 16 x i8> %out
1863}
1864
1865define <vscale x 8 x i16> @uqrshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1866; CHECK-LABEL: uqrshl_i16:
1867; CHECK:       // %bb.0:
1868; CHECK-NEXT:    uqrshl z0.h, p0/m, z0.h, z1.h
1869; CHECK-NEXT:    ret
1870  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqrshl.nxv8i16(<vscale x 8 x i1> %pg,
1871                                                                  <vscale x 8 x i16> %a,
1872                                                                  <vscale x 8 x i16> %b)
1873  ret <vscale x 8 x i16> %out
1874}
1875
1876define <vscale x 4 x i32> @uqrshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1877; CHECK-LABEL: uqrshl_i32:
1878; CHECK:       // %bb.0:
1879; CHECK-NEXT:    uqrshl z0.s, p0/m, z0.s, z1.s
1880; CHECK-NEXT:    ret
1881  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqrshl.nxv4i32(<vscale x 4 x i1> %pg,
1882                                                                  <vscale x 4 x i32> %a,
1883                                                                  <vscale x 4 x i32> %b)
1884  ret <vscale x 4 x i32> %out
1885}
1886
1887define <vscale x 2 x i64> @uqrshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
1888; CHECK-LABEL: uqrshl_i64:
1889; CHECK:       // %bb.0:
1890; CHECK-NEXT:    uqrshl z0.d, p0/m, z0.d, z1.d
1891; CHECK-NEXT:    ret
1892  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqrshl.nxv2i64(<vscale x 2 x i1> %pg,
1893                                                                  <vscale x 2 x i64> %a,
1894                                                                  <vscale x 2 x i64> %b)
1895  ret <vscale x 2 x i64> %out
1896}
1897
1898;
1899; UQRSHLR
1900;
1901
1902define <vscale x 16 x i8> @uqrshlr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1903; CHECK-LABEL: uqrshlr_i8:
1904; CHECK:       // %bb.0:
1905; CHECK-NEXT:    ptrue p0.b
1906; CHECK-NEXT:    uqrshlr z0.b, p0/m, z0.b, z1.b
1907; CHECK-NEXT:    ret
1908  %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
1909  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqrshl.nxv16i8(<vscale x 16 x i1> %pg,
1910                                                                 <vscale x 16 x i8> %b,
1911                                                                 <vscale x 16 x i8> %a)
1912  ret <vscale x 16 x i8> %out
1913}
1914
1915define <vscale x 8 x i16> @uqrshlr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1916; CHECK-LABEL: uqrshlr_i16:
1917; CHECK:       // %bb.0:
1918; CHECK-NEXT:    ptrue p0.h
1919; CHECK-NEXT:    uqrshlr z0.h, p0/m, z0.h, z1.h
1920; CHECK-NEXT:    ret
1921  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
1922  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqrshl.nxv8i16(<vscale x 8 x i1> %pg,
1923                                                                 <vscale x 8 x i16> %b,
1924                                                                 <vscale x 8 x i16> %a)
1925  ret <vscale x 8 x i16> %out
1926}
1927
1928define <vscale x 4 x i32> @uqrshlr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1929; CHECK-LABEL: uqrshlr_i32:
1930; CHECK:       // %bb.0:
1931; CHECK-NEXT:    ptrue p0.s
1932; CHECK-NEXT:    uqrshlr z0.s, p0/m, z0.s, z1.s
1933; CHECK-NEXT:    ret
1934  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
1935  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqrshl.nxv4i32(<vscale x 4 x i1> %pg,
1936                                                                 <vscale x 4 x i32> %b,
1937                                                                 <vscale x 4 x i32> %a)
1938  ret <vscale x 4 x i32> %out
1939}
1940
1941define <vscale x 2 x i64> @uqrshlr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
1942; CHECK-LABEL: uqrshlr_i64:
1943; CHECK:       // %bb.0:
1944; CHECK-NEXT:    ptrue p0.d
1945; CHECK-NEXT:    uqrshlr z0.d, p0/m, z0.d, z1.d
1946; CHECK-NEXT:    ret
1947  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
1948  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqrshl.nxv2i64(<vscale x 2 x i1> %pg,
1949                                                                 <vscale x 2 x i64> %b,
1950                                                                 <vscale x 2 x i64> %a)
1951  ret <vscale x 2 x i64> %out
1952}
1953
1954define <vscale x 2 x i64> @uqrshlr_i64_noptrue(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
1955; CHECK-LABEL: uqrshlr_i64_noptrue:
1956; CHECK:       // %bb.0:
1957; CHECK-NEXT:    uqrshl z1.d, p0/m, z1.d, z0.d
1958; CHECK-NEXT:    mov z0.d, z1.d
1959; CHECK-NEXT:    ret
1960  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqrshl.nxv2i64(<vscale x 2 x i1> %pg,
1961                                                                 <vscale x 2 x i64> %b,
1962                                                                 <vscale x 2 x i64> %a)
1963  ret <vscale x 2 x i64> %out
1964}
1965
1966;
1967; UQSHL (Vectors)
1968;
1969
1970define <vscale x 16 x i8> @uqshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1971; CHECK-LABEL: uqshl_i8:
1972; CHECK:       // %bb.0:
1973; CHECK-NEXT:    uqshl z0.b, p0/m, z0.b, z1.b
1974; CHECK-NEXT:    ret
1975  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqshl.nxv16i8(<vscale x 16 x i1> %pg,
1976                                                                 <vscale x 16 x i8> %a,
1977                                                                 <vscale x 16 x i8> %b)
1978  ret <vscale x 16 x i8> %out
1979}
1980
1981define <vscale x 8 x i16> @uqshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1982; CHECK-LABEL: uqshl_i16:
1983; CHECK:       // %bb.0:
1984; CHECK-NEXT:    uqshl z0.h, p0/m, z0.h, z1.h
1985; CHECK-NEXT:    ret
1986  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqshl.nxv8i16(<vscale x 8 x i1> %pg,
1987                                                                 <vscale x 8 x i16> %a,
1988                                                                 <vscale x 8 x i16> %b)
1989  ret <vscale x 8 x i16> %out
1990}
1991
1992define <vscale x 4 x i32> @uqshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1993; CHECK-LABEL: uqshl_i32:
1994; CHECK:       // %bb.0:
1995; CHECK-NEXT:    uqshl z0.s, p0/m, z0.s, z1.s
1996; CHECK-NEXT:    ret
1997  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.nxv4i32(<vscale x 4 x i1> %pg,
1998                                                                 <vscale x 4 x i32> %a,
1999                                                                 <vscale x 4 x i32> %b)
2000  ret <vscale x 4 x i32> %out
2001}
2002
2003define <vscale x 2 x i64> @uqshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
2004; CHECK-LABEL: uqshl_i64:
2005; CHECK:       // %bb.0:
2006; CHECK-NEXT:    uqshl z0.d, p0/m, z0.d, z1.d
2007; CHECK-NEXT:    ret
2008  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqshl.nxv2i64(<vscale x 2 x i1> %pg,
2009                                                                 <vscale x 2 x i64> %a,
2010                                                                 <vscale x 2 x i64> %b)
2011  ret <vscale x 2 x i64> %out
2012}
2013
2014;
2015; UQSHLR
2016;
2017
2018define <vscale x 16 x i8> @uqshlr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
2019; CHECK-LABEL: uqshlr_i8:
2020; CHECK:       // %bb.0:
2021; CHECK-NEXT:    ptrue p0.b
2022; CHECK-NEXT:    uqshlr z0.b, p0/m, z0.b, z1.b
2023; CHECK-NEXT:    ret
2024  %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
2025  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqshl.nxv16i8(<vscale x 16 x i1> %pg,
2026                                                                 <vscale x 16 x i8> %b,
2027                                                                 <vscale x 16 x i8> %a)
2028  ret <vscale x 16 x i8> %out
2029}
2030
2031define <vscale x 8 x i16> @uqshlr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
2032; CHECK-LABEL: uqshlr_i16:
2033; CHECK:       // %bb.0:
2034; CHECK-NEXT:    ptrue p0.h
2035; CHECK-NEXT:    uqshlr z0.h, p0/m, z0.h, z1.h
2036; CHECK-NEXT:    ret
2037  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
2038  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqshl.nxv8i16(<vscale x 8 x i1> %pg,
2039                                                                 <vscale x 8 x i16> %b,
2040                                                                 <vscale x 8 x i16> %a)
2041  ret <vscale x 8 x i16> %out
2042}
2043
2044define <vscale x 4 x i32> @uqshlr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
2045; CHECK-LABEL: uqshlr_i32:
2046; CHECK:       // %bb.0:
2047; CHECK-NEXT:    ptrue p0.s
2048; CHECK-NEXT:    uqshlr z0.s, p0/m, z0.s, z1.s
2049; CHECK-NEXT:    ret
2050  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
2051  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.nxv4i32(<vscale x 4 x i1> %pg,
2052                                                                 <vscale x 4 x i32> %b,
2053                                                                 <vscale x 4 x i32> %a)
2054  ret <vscale x 4 x i32> %out
2055}
2056
2057define <vscale x 2 x i64> @uqshlr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
2058; CHECK-LABEL: uqshlr_i64:
2059; CHECK:       // %bb.0:
2060; CHECK-NEXT:    ptrue p0.d
2061; CHECK-NEXT:    uqshlr z0.d, p0/m, z0.d, z1.d
2062; CHECK-NEXT:    ret
2063  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
2064  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqshl.nxv2i64(<vscale x 2 x i1> %pg,
2065                                                                 <vscale x 2 x i64> %b,
2066                                                                 <vscale x 2 x i64> %a)
2067  ret <vscale x 2 x i64> %out
2068}
2069
2070define <vscale x 2 x i64> @uqshlr_i64_noptrue(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
2071; CHECK-LABEL: uqshlr_i64_noptrue:
2072; CHECK:       // %bb.0:
2073; CHECK-NEXT:    uqshl z1.d, p0/m, z1.d, z0.d
2074; CHECK-NEXT:    mov z0.d, z1.d
2075; CHECK-NEXT:    ret
2076  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqshl.nxv2i64(<vscale x 2 x i1> %pg,
2077                                                                 <vscale x 2 x i64> %b,
2078                                                                 <vscale x 2 x i64> %a)
2079  ret <vscale x 2 x i64> %out
2080}
2081
2082;
2083; UQSHL (Scalar)
2084;
2085
2086define <vscale x 16 x i8> @uqshl_n_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
2087; CHECK-LABEL: uqshl_n_i8:
2088; CHECK:       // %bb.0:
2089; CHECK-NEXT:    uqshl z0.b, p0/m, z0.b, #7
2090; CHECK-NEXT:    ret
2091  %dup = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 7)
2092  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqshl.nxv16i8(<vscale x 16 x i1> %pg,
2093                                                                 <vscale x 16 x i8> %a,
2094                                                                 <vscale x 16 x i8> %dup)
2095  ret <vscale x 16 x i8> %out
2096}
2097
2098define <vscale x 8 x i16> @uqshl_n_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
2099; CHECK-LABEL: uqshl_n_i16:
2100; CHECK:       // %bb.0:
2101; CHECK-NEXT:    uqshl z0.h, p0/m, z0.h, #15
2102; CHECK-NEXT:    ret
2103  %dup = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 15)
2104  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqshl.nxv8i16(<vscale x 8 x i1> %pg,
2105                                                                 <vscale x 8 x i16> %a,
2106                                                                 <vscale x 8 x i16> %dup)
2107  ret <vscale x 8 x i16> %out
2108}
2109
2110define <vscale x 4 x i32> @uqshl_n_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
2111; CHECK-LABEL: uqshl_n_i32:
2112; CHECK:       // %bb.0:
2113; CHECK-NEXT:    uqshl z0.s, p0/m, z0.s, #31
2114; CHECK-NEXT:    ret
2115  %dup = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 31)
2116  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.nxv4i32(<vscale x 4 x i1> %pg,
2117                                                                 <vscale x 4 x i32> %a,
2118                                                                 <vscale x 4 x i32> %dup)
2119  ret <vscale x 4 x i32> %out
2120}
2121
2122define <vscale x 2 x i64> @uqshl_n_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
2123; CHECK-LABEL: uqshl_n_i64:
2124; CHECK:       // %bb.0:
2125; CHECK-NEXT:    uqshl z0.d, p0/m, z0.d, #63
2126; CHECK-NEXT:    ret
2127  %dup = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 63)
2128  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqshl.nxv2i64(<vscale x 2 x i1> %pg,
2129                                                                 <vscale x 2 x i64> %a,
2130                                                                 <vscale x 2 x i64> %dup)
2131  ret <vscale x 2 x i64> %out
2132}
2133
2134define <vscale x 16 x i8> @uqshl_n_i8_range(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
2135; CHECK-LABEL: uqshl_n_i8_range:
2136; CHECK:       // %bb.0:
2137; CHECK-NEXT:    mov z1.b, #8 // =0x8
2138; CHECK-NEXT:    uqshl z0.b, p0/m, z0.b, z1.b
2139; CHECK-NEXT:    ret
2140  %dup = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 8)
2141  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqshl.nxv16i8(<vscale x 16 x i1> %pg,
2142                                                                 <vscale x 16 x i8> %a,
2143                                                                 <vscale x 16 x i8> %dup)
2144  ret <vscale x 16 x i8> %out
2145}
2146
2147define <vscale x 8 x i16> @uqshl_n_i16_range(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
2148; CHECK-LABEL: uqshl_n_i16_range:
2149; CHECK:       // %bb.0:
2150; CHECK-NEXT:    mov z1.h, #16 // =0x10
2151; CHECK-NEXT:    uqshl z0.h, p0/m, z0.h, z1.h
2152; CHECK-NEXT:    ret
2153  %dup = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 16)
2154  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqshl.nxv8i16(<vscale x 8 x i1> %pg,
2155                                                                 <vscale x 8 x i16> %a,
2156                                                                 <vscale x 8 x i16> %dup)
2157  ret <vscale x 8 x i16> %out
2158}
2159
2160define <vscale x 4 x i32> @uqshl_n_i32_range(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
2161; CHECK-LABEL: uqshl_n_i32_range:
2162; CHECK:       // %bb.0:
2163; CHECK-NEXT:    mov z1.s, #32 // =0x20
2164; CHECK-NEXT:    uqshl z0.s, p0/m, z0.s, z1.s
2165; CHECK-NEXT:    ret
2166  %dup = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 32)
2167  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.nxv4i32(<vscale x 4 x i1> %pg,
2168                                                                 <vscale x 4 x i32> %a,
2169                                                                 <vscale x 4 x i32> %dup)
2170  ret <vscale x 4 x i32> %out
2171}
2172
2173define <vscale x 2 x i64> @uqshl_n_i64_range(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
2174; CHECK-LABEL: uqshl_n_i64_range:
2175; CHECK:       // %bb.0:
2176; CHECK-NEXT:    mov z1.d, #64 // =0x40
2177; CHECK-NEXT:    uqshl z0.d, p0/m, z0.d, z1.d
2178; CHECK-NEXT:    ret
2179  %dup = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 64)
2180  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqshl.nxv2i64(<vscale x 2 x i1> %pg,
2181                                                                 <vscale x 2 x i64> %a,
2182                                                                 <vscale x 2 x i64> %dup)
2183  ret <vscale x 2 x i64> %out
2184}
2185
2186;
2187; UQSUB
2188;
2189
2190define <vscale x 16 x i8> @uqsub_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
2191; CHECK-LABEL: uqsub_i8:
2192; CHECK:       // %bb.0:
2193; CHECK-NEXT:    uqsub z0.b, p0/m, z0.b, z1.b
2194; CHECK-NEXT:    ret
2195  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.nxv16i8(<vscale x 16 x i1> %pg,
2196                                                                 <vscale x 16 x i8> %a,
2197                                                                 <vscale x 16 x i8> %b)
2198  ret <vscale x 16 x i8> %out
2199}
2200
2201define <vscale x 8 x i16> @uqsub_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
2202; CHECK-LABEL: uqsub_i16:
2203; CHECK:       // %bb.0:
2204; CHECK-NEXT:    uqsub z0.h, p0/m, z0.h, z1.h
2205; CHECK-NEXT:    ret
2206  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.nxv8i16(<vscale x 8 x i1> %pg,
2207                                                                 <vscale x 8 x i16> %a,
2208                                                                 <vscale x 8 x i16> %b)
2209  ret <vscale x 8 x i16> %out
2210}
2211
2212define <vscale x 4 x i32> @uqsub_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
2213; CHECK-LABEL: uqsub_i32:
2214; CHECK:       // %bb.0:
2215; CHECK-NEXT:    uqsub z0.s, p0/m, z0.s, z1.s
2216; CHECK-NEXT:    ret
2217  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1> %pg,
2218                                                                 <vscale x 4 x i32> %a,
2219                                                                 <vscale x 4 x i32> %b)
2220  ret <vscale x 4 x i32> %out
2221}
2222
2223define <vscale x 2 x i64> @uqsub_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
2224; CHECK-LABEL: uqsub_i64:
2225; CHECK:       // %bb.0:
2226; CHECK-NEXT:    uqsub z0.d, p0/m, z0.d, z1.d
2227; CHECK-NEXT:    ret
2228  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.nxv2i64(<vscale x 2 x i1> %pg,
2229                                                                 <vscale x 2 x i64> %a,
2230                                                                 <vscale x 2 x i64> %b)
2231  ret <vscale x 2 x i64> %out
2232}
2233
2234;
2235; UQSUBR
2236;
2237
2238define <vscale x 16 x i8> @uqsubr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
2239; CHECK-LABEL: uqsubr_i8:
2240; CHECK:       // %bb.0:
2241; CHECK-NEXT:    uqsubr z0.b, p0/m, z0.b, z1.b
2242; CHECK-NEXT:    ret
2243  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqsubr.nxv16i8(<vscale x 16 x i1> %pg,
2244                                                                  <vscale x 16 x i8> %a,
2245                                                                  <vscale x 16 x i8> %b)
2246  ret <vscale x 16 x i8> %out
2247}
2248
2249define <vscale x 8 x i16> @uqsubr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
2250; CHECK-LABEL: uqsubr_i16:
2251; CHECK:       // %bb.0:
2252; CHECK-NEXT:    uqsubr z0.h, p0/m, z0.h, z1.h
2253; CHECK-NEXT:    ret
2254  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqsubr.nxv8i16(<vscale x 8 x i1> %pg,
2255                                                                  <vscale x 8 x i16> %a,
2256                                                                  <vscale x 8 x i16> %b)
2257  ret <vscale x 8 x i16> %out
2258}
2259
2260define <vscale x 4 x i32> @uqsubr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
2261; CHECK-LABEL: uqsubr_i32:
2262; CHECK:       // %bb.0:
2263; CHECK-NEXT:    uqsubr z0.s, p0/m, z0.s, z1.s
2264; CHECK-NEXT:    ret
2265  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqsubr.nxv4i32(<vscale x 4 x i1> %pg,
2266                                                                  <vscale x 4 x i32> %a,
2267                                                                  <vscale x 4 x i32> %b)
2268  ret <vscale x 4 x i32> %out
2269}
2270
2271define <vscale x 2 x i64> @uqsubr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
2272; CHECK-LABEL: uqsubr_i64:
2273; CHECK:       // %bb.0:
2274; CHECK-NEXT:    uqsubr z0.d, p0/m, z0.d, z1.d
2275; CHECK-NEXT:    ret
2276  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqsubr.nxv2i64(<vscale x 2 x i1> %pg,
2277                                                                  <vscale x 2 x i64> %a,
2278                                                                  <vscale x 2 x i64> %b)
2279  ret <vscale x 2 x i64> %out
2280}
2281
2282;
2283; URECPE
2284;
2285
2286define <vscale x 4 x i32> @urecpe_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
2287; CHECK-LABEL: urecpe_i32:
2288; CHECK:       // %bb.0:
2289; CHECK-NEXT:    urecpe z0.s, p0/m, z1.s
2290; CHECK-NEXT:    ret
2291  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32> %a,
2292                                                                  <vscale x 4 x i1> %pg,
2293                                                                  <vscale x 4 x i32> %b)
2294  ret <vscale x 4 x i32> %out
2295}
2296
2297;
2298; URHADD
2299;
2300
2301define <vscale x 16 x i8> @urhadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
2302; CHECK-LABEL: urhadd_i8:
2303; CHECK:       // %bb.0:
2304; CHECK-NEXT:    urhadd z0.b, p0/m, z0.b, z1.b
2305; CHECK-NEXT:    ret
2306  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.urhadd.nxv16i8(<vscale x 16 x i1> %pg,
2307                                                                  <vscale x 16 x i8> %a,
2308                                                                  <vscale x 16 x i8> %b)
2309  ret <vscale x 16 x i8> %out
2310}
2311
2312define <vscale x 8 x i16> @urhadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
2313; CHECK-LABEL: urhadd_i16:
2314; CHECK:       // %bb.0:
2315; CHECK-NEXT:    urhadd z0.h, p0/m, z0.h, z1.h
2316; CHECK-NEXT:    ret
2317  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.urhadd.nxv8i16(<vscale x 8 x i1> %pg,
2318                                                                  <vscale x 8 x i16> %a,
2319                                                                  <vscale x 8 x i16> %b)
2320  ret <vscale x 8 x i16> %out
2321}
2322
2323define <vscale x 4 x i32> @urhadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
2324; CHECK-LABEL: urhadd_i32:
2325; CHECK:       // %bb.0:
2326; CHECK-NEXT:    urhadd z0.s, p0/m, z0.s, z1.s
2327; CHECK-NEXT:    ret
2328  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.urhadd.nxv4i32(<vscale x 4 x i1> %pg,
2329                                                                  <vscale x 4 x i32> %a,
2330                                                                  <vscale x 4 x i32> %b)
2331  ret <vscale x 4 x i32> %out
2332}
2333
2334define <vscale x 2 x i64> @urhadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
2335; CHECK-LABEL: urhadd_i64:
2336; CHECK:       // %bb.0:
2337; CHECK-NEXT:    urhadd z0.d, p0/m, z0.d, z1.d
2338; CHECK-NEXT:    ret
2339  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.urhadd.nxv2i64(<vscale x 2 x i1> %pg,
2340                                                             <vscale x 2 x i64> %a,
2341                                                             <vscale x 2 x i64> %b)
2342  ret <vscale x 2 x i64> %out
2343}
2344
2345;
2346; URSHL
2347;
2348
2349define <vscale x 16 x i8> @urshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
2350; CHECK-LABEL: urshl_i8:
2351; CHECK:       // %bb.0:
2352; CHECK-NEXT:    urshl z0.b, p0/m, z0.b, z1.b
2353; CHECK-NEXT:    ret
2354  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.urshl.nxv16i8(<vscale x 16 x i1> %pg,
2355                                                                 <vscale x 16 x i8> %a,
2356                                                                 <vscale x 16 x i8> %b)
2357  ret <vscale x 16 x i8> %out
2358}
2359
2360define <vscale x 8 x i16> @urshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
2361; CHECK-LABEL: urshl_i16:
2362; CHECK:       // %bb.0:
2363; CHECK-NEXT:    urshl z0.h, p0/m, z0.h, z1.h
2364; CHECK-NEXT:    ret
2365  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.urshl.nxv8i16(<vscale x 8 x i1> %pg,
2366                                                                 <vscale x 8 x i16> %a,
2367                                                                 <vscale x 8 x i16> %b)
2368  ret <vscale x 8 x i16> %out
2369}
2370
2371define <vscale x 4 x i32> @urshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
2372; CHECK-LABEL: urshl_i32:
2373; CHECK:       // %bb.0:
2374; CHECK-NEXT:    urshl z0.s, p0/m, z0.s, z1.s
2375; CHECK-NEXT:    ret
2376  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.urshl.nxv4i32(<vscale x 4 x i1> %pg,
2377                                                                 <vscale x 4 x i32> %a,
2378                                                                 <vscale x 4 x i32> %b)
2379  ret <vscale x 4 x i32> %out
2380}
2381
2382define <vscale x 2 x i64> @urshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
2383; CHECK-LABEL: urshl_i64:
2384; CHECK:       // %bb.0:
2385; CHECK-NEXT:    urshl z0.d, p0/m, z0.d, z1.d
2386; CHECK-NEXT:    ret
2387  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.urshl.nxv2i64(<vscale x 2 x i1> %pg,
2388                                                                 <vscale x 2 x i64> %a,
2389                                                                 <vscale x 2 x i64> %b)
2390  ret <vscale x 2 x i64> %out
2391}
2392
2393;
2394; URSHLR
2395;
2396
2397define <vscale x 16 x i8> @urshlr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
2398; CHECK-LABEL: urshlr_i8:
2399; CHECK:       // %bb.0:
2400; CHECK-NEXT:    ptrue p0.b
2401; CHECK-NEXT:    urshlr z0.b, p0/m, z0.b, z1.b
2402; CHECK-NEXT:    ret
2403  %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
2404  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.urshl.nxv16i8(<vscale x 16 x i1> %pg,
2405                                                                 <vscale x 16 x i8> %b,
2406                                                                 <vscale x 16 x i8> %a)
2407  ret <vscale x 16 x i8> %out
2408}
2409
2410define <vscale x 8 x i16> @urshlr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
2411; CHECK-LABEL: urshlr_i16:
2412; CHECK:       // %bb.0:
2413; CHECK-NEXT:    ptrue p0.h
2414; CHECK-NEXT:    urshlr z0.h, p0/m, z0.h, z1.h
2415; CHECK-NEXT:    ret
2416  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
2417  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.urshl.nxv8i16(<vscale x 8 x i1> %pg,
2418                                                                 <vscale x 8 x i16> %b,
2419                                                                 <vscale x 8 x i16> %a)
2420  ret <vscale x 8 x i16> %out
2421}
2422
2423define <vscale x 4 x i32> @urshlr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
2424; CHECK-LABEL: urshlr_i32:
2425; CHECK:       // %bb.0:
2426; CHECK-NEXT:    ptrue p0.s
2427; CHECK-NEXT:    urshlr z0.s, p0/m, z0.s, z1.s
2428; CHECK-NEXT:    ret
2429  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
2430  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.urshl.nxv4i32(<vscale x 4 x i1> %pg,
2431                                                                 <vscale x 4 x i32> %b,
2432                                                                 <vscale x 4 x i32> %a)
2433  ret <vscale x 4 x i32> %out
2434}
2435
2436define <vscale x 2 x i64> @urshlr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
2437; CHECK-LABEL: urshlr_i64:
2438; CHECK:       // %bb.0:
2439; CHECK-NEXT:    ptrue p0.d
2440; CHECK-NEXT:    urshlr z0.d, p0/m, z0.d, z1.d
2441; CHECK-NEXT:    ret
2442  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
2443  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.urshl.nxv2i64(<vscale x 2 x i1> %pg,
2444                                                                 <vscale x 2 x i64> %b,
2445                                                                 <vscale x 2 x i64> %a)
2446  ret <vscale x 2 x i64> %out
2447}
2448
2449define <vscale x 2 x i64> @urshlr_i64_noptrue(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
2450; CHECK-LABEL: urshlr_i64_noptrue:
2451; CHECK:       // %bb.0:
2452; CHECK-NEXT:    urshl z1.d, p0/m, z1.d, z0.d
2453; CHECK-NEXT:    mov z0.d, z1.d
2454; CHECK-NEXT:    ret
2455  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.urshl.nxv2i64(<vscale x 2 x i1> %pg,
2456                                                                 <vscale x 2 x i64> %b,
2457                                                                 <vscale x 2 x i64> %a)
2458  ret <vscale x 2 x i64> %out
2459}
2460
2461;
2462; URSHR
2463;
2464
2465define <vscale x 16 x i8> @urshr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
2466; CHECK-LABEL: urshr_i8:
2467; CHECK:       // %bb.0:
2468; CHECK-NEXT:    urshr z0.b, p0/m, z0.b, #4
2469; CHECK-NEXT:    ret
2470  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.urshr.nxv16i8(<vscale x 16 x i1> %pg,
2471                                                                 <vscale x 16 x i8> %a,
2472                                                                 i32 4)
2473  ret <vscale x 16 x i8> %out
2474}
2475
2476define <vscale x 8 x i16> @urshr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
2477; CHECK-LABEL: urshr_i16:
2478; CHECK:       // %bb.0:
2479; CHECK-NEXT:    urshr z0.h, p0/m, z0.h, #13
2480; CHECK-NEXT:    ret
2481  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.urshr.nxv8i16(<vscale x 8 x i1> %pg,
2482                                                                 <vscale x 8 x i16> %a,
2483                                                                 i32 13)
2484  ret <vscale x 8 x i16> %out
2485}
2486
2487define <vscale x 4 x i32> @urshr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
2488; CHECK-LABEL: urshr_i32:
2489; CHECK:       // %bb.0:
2490; CHECK-NEXT:    urshr z0.s, p0/m, z0.s, #1
2491; CHECK-NEXT:    ret
2492  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.urshr.nxv4i32(<vscale x 4 x i1> %pg,
2493                                                                 <vscale x 4 x i32> %a,
2494                                                                 i32 1)
2495  ret <vscale x 4 x i32> %out
2496}
2497
2498define <vscale x 2 x i64> @urshr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
2499; CHECK-LABEL: urshr_i64:
2500; CHECK:       // %bb.0:
2501; CHECK-NEXT:    urshr z0.d, p0/m, z0.d, #24
2502; CHECK-NEXT:    ret
2503  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.urshr.nxv2i64(<vscale x 2 x i1> %pg,
2504                                                                 <vscale x 2 x i64> %a,
2505                                                                 i32 24)
2506  ret <vscale x 2 x i64> %out
2507}
2508
2509;
2510; URSQRTE
2511;
2512
2513define <vscale x 4 x i32> @ursqrte_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
2514; CHECK-LABEL: ursqrte_i32:
2515; CHECK:       // %bb.0:
2516; CHECK-NEXT:    ursqrte z0.s, p0/m, z1.s
2517; CHECK-NEXT:    ret
2518  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ursqrte.nxv4i32(<vscale x 4 x i32> %a,
2519                                                                   <vscale x 4 x i1> %pg,
2520                                                                   <vscale x 4 x i32> %b)
2521  ret <vscale x 4 x i32> %out
2522}
2523
2524;
2525; URSRA
2526;
2527
2528define <vscale x 16 x i8> @ursra_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
2529; CHECK-LABEL: ursra_i8:
2530; CHECK:       // %bb.0:
2531; CHECK-NEXT:    ursra z0.b, z1.b, #5
2532; CHECK-NEXT:    ret
2533  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.ursra.nxv16i8(<vscale x 16 x i8> %a,
2534                                                                 <vscale x 16 x i8> %b,
2535                                                                 i32 5)
2536  ret <vscale x 16 x i8> %out
2537}
2538
2539define <vscale x 8 x i16> @ursra_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
2540; CHECK-LABEL: ursra_i16:
2541; CHECK:       // %bb.0:
2542; CHECK-NEXT:    ursra z0.h, z1.h, #12
2543; CHECK-NEXT:    ret
2544  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ursra.nxv8i16(<vscale x 8 x i16> %a,
2545                                                                 <vscale x 8 x i16> %b,
2546                                                                 i32 12)
2547  ret <vscale x 8 x i16> %out
2548}
2549
2550define <vscale x 4 x i32> @ursra_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
2551; CHECK-LABEL: ursra_i32:
2552; CHECK:       // %bb.0:
2553; CHECK-NEXT:    ursra z0.s, z1.s, #31
2554; CHECK-NEXT:    ret
2555  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ursra.nxv4i32(<vscale x 4 x i32> %a,
2556                                                                 <vscale x 4 x i32> %b,
2557                                                                 i32 31)
2558  ret <vscale x 4 x i32> %out
2559}
2560
2561define <vscale x 2 x i64> @ursra_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
2562; CHECK-LABEL: ursra_i64:
2563; CHECK:       // %bb.0:
2564; CHECK-NEXT:    ursra z0.d, z1.d, #14
2565; CHECK-NEXT:    ret
2566  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ursra.nxv2i64(<vscale x 2 x i64> %a,
2567                                                                 <vscale x 2 x i64> %b,
2568                                                                 i32 14)
2569  ret <vscale x 2 x i64> %out
2570}
2571
2572;
2573; USQADD
2574;
2575
2576define <vscale x 16 x i8> @usqadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
2577; CHECK-LABEL: usqadd_i8:
2578; CHECK:       // %bb.0:
2579; CHECK-NEXT:    usqadd z0.b, p0/m, z0.b, z1.b
2580; CHECK-NEXT:    ret
2581  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.usqadd.nxv16i8(<vscale x 16 x i1> %pg,
2582                                                                  <vscale x 16 x i8> %a,
2583                                                                  <vscale x 16 x i8> %b)
2584  ret <vscale x 16 x i8> %out
2585}
2586
2587define <vscale x 8 x i16> @usqadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
2588; CHECK-LABEL: usqadd_i16:
2589; CHECK:       // %bb.0:
2590; CHECK-NEXT:    usqadd z0.h, p0/m, z0.h, z1.h
2591; CHECK-NEXT:    ret
2592  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.usqadd.nxv8i16(<vscale x 8 x i1> %pg,
2593                                                                  <vscale x 8 x i16> %a,
2594                                                                  <vscale x 8 x i16> %b)
2595  ret <vscale x 8 x i16> %out
2596}
2597
2598define <vscale x 4 x i32> @usqadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
2599; CHECK-LABEL: usqadd_i32:
2600; CHECK:       // %bb.0:
2601; CHECK-NEXT:    usqadd z0.s, p0/m, z0.s, z1.s
2602; CHECK-NEXT:    ret
2603  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.usqadd.nxv4i32(<vscale x 4 x i1> %pg,
2604                                                                  <vscale x 4 x i32> %a,
2605                                                                  <vscale x 4 x i32> %b)
2606  ret <vscale x 4 x i32> %out
2607}
2608
2609define <vscale x 2 x i64> @usqadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
2610; CHECK-LABEL: usqadd_i64:
2611; CHECK:       // %bb.0:
2612; CHECK-NEXT:    usqadd z0.d, p0/m, z0.d, z1.d
2613; CHECK-NEXT:    ret
2614  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.usqadd.nxv2i64(<vscale x 2 x i1> %pg,
2615                                                                  <vscale x 2 x i64> %a,
2616                                                                  <vscale x 2 x i64> %b)
2617  ret <vscale x 2 x i64> %out
2618}
2619
2620;
2621; USRA
2622;
2623
2624define <vscale x 16 x i8> @usra_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
2625; CHECK-LABEL: usra_i8:
2626; CHECK:       // %bb.0:
2627; CHECK-NEXT:    usra z0.b, z1.b, #6
2628; CHECK-NEXT:    ret
2629  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.usra.nxv16i8(<vscale x 16 x i8> %a,
2630                                                                <vscale x 16 x i8> %b,
2631                                                                i32 6)
2632  ret <vscale x 16 x i8> %out
2633}
2634
2635define <vscale x 8 x i16> @usra_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
2636; CHECK-LABEL: usra_i16:
2637; CHECK:       // %bb.0:
2638; CHECK-NEXT:    usra z0.h, z1.h, #11
2639; CHECK-NEXT:    ret
2640  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.usra.nxv8i16(<vscale x 8 x i16> %a,
2641                                                                <vscale x 8 x i16> %b,
2642                                                                i32 11)
2643  ret <vscale x 8 x i16> %out
2644}
2645
2646define <vscale x 4 x i32> @usra_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
2647; CHECK-LABEL: usra_i32:
2648; CHECK:       // %bb.0:
2649; CHECK-NEXT:    usra z0.s, z1.s, #21
2650; CHECK-NEXT:    ret
2651  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.usra.nxv4i32(<vscale x 4 x i32> %a,
2652                                                                <vscale x 4 x i32> %b,
2653                                                                i32 21)
2654  ret <vscale x 4 x i32> %out
2655}
2656
2657define <vscale x 2 x i64> @usra_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
2658; CHECK-LABEL: usra_i64:
2659; CHECK:       // %bb.0:
2660; CHECK-NEXT:    usra z0.d, z1.d, #4
2661; CHECK-NEXT:    ret
2662  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.usra.nxv2i64(<vscale x 2 x i64> %a,
2663                                                                <vscale x 2 x i64> %b,
2664                                                                i32 4)
2665  ret <vscale x 2 x i64> %out
2666}
2667
2668declare <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8)
2669declare <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16)
2670declare <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32)
2671declare <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64)
2672
2673declare <vscale x 16 x i8> @llvm.aarch64.sve.saba.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2674declare <vscale x 8 x i16> @llvm.aarch64.sve.saba.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2675declare <vscale x 4 x i32> @llvm.aarch64.sve.saba.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2676declare <vscale x 2 x i64> @llvm.aarch64.sve.saba.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2677
2678declare <vscale x 16 x i8> @llvm.aarch64.sve.shadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2679declare <vscale x 8 x i16> @llvm.aarch64.sve.shadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2680declare <vscale x 4 x i32> @llvm.aarch64.sve.shadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2681declare <vscale x 2 x i64> @llvm.aarch64.sve.shadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2682
2683declare <vscale x 16 x i8> @llvm.aarch64.sve.shsub.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2684declare <vscale x 8 x i16> @llvm.aarch64.sve.shsub.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2685declare <vscale x 4 x i32> @llvm.aarch64.sve.shsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2686declare <vscale x 2 x i64> @llvm.aarch64.sve.shsub.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2687
2688declare <vscale x 16 x i8> @llvm.aarch64.sve.shsubr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2689declare <vscale x 8 x i16> @llvm.aarch64.sve.shsubr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2690declare <vscale x 4 x i32> @llvm.aarch64.sve.shsubr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2691declare <vscale x 2 x i64> @llvm.aarch64.sve.shsubr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2692
2693declare <vscale x 16 x i8> @llvm.aarch64.sve.sli.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32)
2694declare <vscale x 8 x i16> @llvm.aarch64.sve.sli.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
2695declare <vscale x 4 x i32> @llvm.aarch64.sve.sli.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
2696declare <vscale x 2 x i64> @llvm.aarch64.sve.sli.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)
2697
2698declare <vscale x 16 x i8> @llvm.aarch64.sve.sqabs.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>)
2699declare <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>)
2700declare <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
2701declare <vscale x 2 x i64> @llvm.aarch64.sve.sqabs.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>)
2702
2703declare <vscale x 16 x i8> @llvm.aarch64.sve.sqadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2704declare <vscale x 8 x i16> @llvm.aarch64.sve.sqadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2705declare <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2706declare <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2707
2708declare <vscale x 16 x i8> @llvm.aarch64.sve.sqdmulh.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
2709declare <vscale x 8 x i16> @llvm.aarch64.sve.sqdmulh.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
2710declare <vscale x 4 x i32> @llvm.aarch64.sve.sqdmulh.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
2711declare <vscale x 2 x i64> @llvm.aarch64.sve.sqdmulh.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
2712
2713declare <vscale x 8 x i16> @llvm.aarch64.sve.sqdmulh.lane.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
2714declare <vscale x 4 x i32> @llvm.aarch64.sve.sqdmulh.lane.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
2715declare <vscale x 2 x i64> @llvm.aarch64.sve.sqdmulh.lane.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)
2716
2717declare <vscale x 16 x i8> @llvm.aarch64.sve.sqneg.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>)
2718declare <vscale x 8 x i16> @llvm.aarch64.sve.sqneg.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>)
2719declare <vscale x 4 x i32> @llvm.aarch64.sve.sqneg.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
2720declare <vscale x 2 x i64> @llvm.aarch64.sve.sqneg.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>)
2721
2722declare <vscale x 16 x i8> @llvm.aarch64.sve.sqrdmlah.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2723declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmlah.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2724declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmlah.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2725declare <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmlah.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2726
2727declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmlah.lane.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32)
2728declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmlah.lane.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, i32)
2729declare <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmlah.lane.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, i32)
2730
2731declare <vscale x 16 x i8> @llvm.aarch64.sve.sqrdmlsh.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2732declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmlsh.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2733declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmlsh.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2734declare <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmlsh.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2735
2736declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmlsh.lane.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32)
2737declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmlsh.lane.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, i32)
2738declare <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmlsh.lane.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, i32)
2739
2740declare <vscale x 16 x i8> @llvm.aarch64.sve.sqrdmulh.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
2741declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmulh.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
2742declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmulh.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
2743declare <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmulh.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
2744
2745declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmulh.lane.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
2746declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmulh.lane.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
2747declare <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmulh.lane.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)
2748
2749declare <vscale x 16 x i8> @llvm.aarch64.sve.sqrshl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2750declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrshl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2751declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrshl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2752declare <vscale x 2 x i64> @llvm.aarch64.sve.sqrshl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2753
2754declare <vscale x 16 x i8> @llvm.aarch64.sve.sqshl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2755declare <vscale x 8 x i16> @llvm.aarch64.sve.sqshl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2756declare <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2757declare <vscale x 2 x i64> @llvm.aarch64.sve.sqshl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2758
2759declare <vscale x 16 x i8> @llvm.aarch64.sve.sqshlu.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, i32)
2760declare <vscale x 8 x i16> @llvm.aarch64.sve.sqshlu.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, i32)
2761declare <vscale x 4 x i32> @llvm.aarch64.sve.sqshlu.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i32)
2762declare <vscale x 2 x i64> @llvm.aarch64.sve.sqshlu.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i32)
2763
2764declare <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2765declare <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2766declare <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2767declare <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2768
2769declare <vscale x 16 x i8> @llvm.aarch64.sve.sqsubr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2770declare <vscale x 8 x i16> @llvm.aarch64.sve.sqsubr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2771declare <vscale x 4 x i32> @llvm.aarch64.sve.sqsubr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2772declare <vscale x 2 x i64> @llvm.aarch64.sve.sqsubr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2773
2774declare <vscale x 16 x i8> @llvm.aarch64.sve.srhadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2775declare <vscale x 8 x i16> @llvm.aarch64.sve.srhadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2776declare <vscale x 4 x i32> @llvm.aarch64.sve.srhadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2777declare <vscale x 2 x i64> @llvm.aarch64.sve.srhadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2778
2779declare <vscale x 16 x i8> @llvm.aarch64.sve.sri.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32)
2780declare <vscale x 8 x i16> @llvm.aarch64.sve.sri.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
2781declare <vscale x 4 x i32> @llvm.aarch64.sve.sri.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
2782declare <vscale x 2 x i64> @llvm.aarch64.sve.sri.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)
2783
2784declare <vscale x 16 x i8> @llvm.aarch64.sve.srshl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2785declare <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2786declare <vscale x 4 x i32> @llvm.aarch64.sve.srshl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2787declare <vscale x 2 x i64> @llvm.aarch64.sve.srshl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2788
2789declare <vscale x 16 x i8> @llvm.aarch64.sve.srshr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, i32)
2790declare <vscale x 8 x i16> @llvm.aarch64.sve.srshr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, i32)
2791declare <vscale x 4 x i32> @llvm.aarch64.sve.srshr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i32)
2792declare <vscale x 2 x i64> @llvm.aarch64.sve.srshr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i32)
2793
2794declare <vscale x 16 x i8> @llvm.aarch64.sve.srsra.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32)
2795declare <vscale x 8 x i16> @llvm.aarch64.sve.srsra.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
2796declare <vscale x 4 x i32> @llvm.aarch64.sve.srsra.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
2797declare <vscale x 2 x i64> @llvm.aarch64.sve.srsra.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)
2798
2799declare <vscale x 16 x i8> @llvm.aarch64.sve.ssra.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32)
2800declare <vscale x 8 x i16> @llvm.aarch64.sve.ssra.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
2801declare <vscale x 4 x i32> @llvm.aarch64.sve.ssra.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
2802declare <vscale x 2 x i64> @llvm.aarch64.sve.ssra.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)
2803
2804declare <vscale x 16 x i8> @llvm.aarch64.sve.suqadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2805declare <vscale x 8 x i16> @llvm.aarch64.sve.suqadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2806declare <vscale x 4 x i32> @llvm.aarch64.sve.suqadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2807declare <vscale x 2 x i64> @llvm.aarch64.sve.suqadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2808
2809declare <vscale x 16 x i8> @llvm.aarch64.sve.uaba.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2810declare <vscale x 8 x i16> @llvm.aarch64.sve.uaba.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2811declare <vscale x 4 x i32> @llvm.aarch64.sve.uaba.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2812declare <vscale x 2 x i64> @llvm.aarch64.sve.uaba.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2813
2814declare <vscale x 16 x i8> @llvm.aarch64.sve.uhadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2815declare <vscale x 8 x i16> @llvm.aarch64.sve.uhadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2816declare <vscale x 4 x i32> @llvm.aarch64.sve.uhadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2817declare <vscale x 2 x i64> @llvm.aarch64.sve.uhadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2818
2819declare <vscale x 16 x i8> @llvm.aarch64.sve.uhsub.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2820declare <vscale x 8 x i16> @llvm.aarch64.sve.uhsub.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2821declare <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2822declare <vscale x 2 x i64> @llvm.aarch64.sve.uhsub.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2823
2824declare <vscale x 16 x i8> @llvm.aarch64.sve.uhsubr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2825declare <vscale x 8 x i16> @llvm.aarch64.sve.uhsubr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2826declare <vscale x 4 x i32> @llvm.aarch64.sve.uhsubr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2827declare <vscale x 2 x i64> @llvm.aarch64.sve.uhsubr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2828
2829declare <vscale x 16 x i8> @llvm.aarch64.sve.uqadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2830declare <vscale x 8 x i16> @llvm.aarch64.sve.uqadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2831declare <vscale x 4 x i32> @llvm.aarch64.sve.uqadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2832declare <vscale x 2 x i64> @llvm.aarch64.sve.uqadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2833
2834declare <vscale x 16 x i8> @llvm.aarch64.sve.uqrshl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2835declare <vscale x 8 x i16> @llvm.aarch64.sve.uqrshl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2836declare <vscale x 4 x i32> @llvm.aarch64.sve.uqrshl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2837declare <vscale x 2 x i64> @llvm.aarch64.sve.uqrshl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2838
2839declare <vscale x 16 x i8> @llvm.aarch64.sve.uqshl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2840declare <vscale x 8 x i16> @llvm.aarch64.sve.uqshl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2841declare <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2842declare <vscale x 2 x i64> @llvm.aarch64.sve.uqshl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2843
2844declare <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2845declare <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2846declare <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2847declare <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2848
2849declare <vscale x 16 x i8> @llvm.aarch64.sve.uqsubr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2850declare <vscale x 8 x i16> @llvm.aarch64.sve.uqsubr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2851declare <vscale x 4 x i32> @llvm.aarch64.sve.uqsubr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2852declare <vscale x 2 x i64> @llvm.aarch64.sve.uqsubr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2853
2854declare <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
2855
2856declare <vscale x 16 x i8> @llvm.aarch64.sve.urhadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2857declare <vscale x 8 x i16> @llvm.aarch64.sve.urhadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2858declare <vscale x 4 x i32> @llvm.aarch64.sve.urhadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2859declare <vscale x 2 x i64> @llvm.aarch64.sve.urhadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2860
2861declare <vscale x 16 x i8> @llvm.aarch64.sve.urshl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2862declare <vscale x 8 x i16> @llvm.aarch64.sve.urshl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2863declare <vscale x 4 x i32> @llvm.aarch64.sve.urshl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2864declare <vscale x 2 x i64> @llvm.aarch64.sve.urshl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2865
2866declare <vscale x 16 x i8> @llvm.aarch64.sve.urshr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, i32)
2867declare <vscale x 8 x i16> @llvm.aarch64.sve.urshr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, i32)
2868declare <vscale x 4 x i32> @llvm.aarch64.sve.urshr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i32)
2869declare <vscale x 2 x i64> @llvm.aarch64.sve.urshr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i32)
2870
2871declare <vscale x 4 x i32> @llvm.aarch64.sve.ursqrte.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
2872
2873declare <vscale x 16 x i8> @llvm.aarch64.sve.ursra.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32)
2874declare <vscale x 8 x i16> @llvm.aarch64.sve.ursra.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
2875declare <vscale x 4 x i32> @llvm.aarch64.sve.ursra.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
2876declare <vscale x 2 x i64> @llvm.aarch64.sve.ursra.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)
2877
2878declare <vscale x 16 x i8> @llvm.aarch64.sve.usqadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2879declare <vscale x 8 x i16> @llvm.aarch64.sve.usqadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2880declare <vscale x 4 x i32> @llvm.aarch64.sve.usqadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2881declare <vscale x 2 x i64> @llvm.aarch64.sve.usqadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2882
2883declare <vscale x 16 x i8> @llvm.aarch64.sve.usra.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32)
2884declare <vscale x 8 x i16> @llvm.aarch64.sve.usra.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
2885declare <vscale x 4 x i32> @llvm.aarch64.sve.usra.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
2886declare <vscale x 2 x i64> @llvm.aarch64.sve.usra.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)
2887
2888declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
2889declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)
2890declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
2891declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32)
2892