xref: /llvm-project/llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-dsp.ll (revision 62baf21daa377c4ec1a641b26931063c1117d262)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
3; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s
4
5;
6; SABALB
7;
8
9define <vscale x 8 x i16> @sabalb_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
10; CHECK-LABEL: sabalb_b:
11; CHECK:       // %bb.0:
12; CHECK-NEXT:    sabalb z0.h, z1.b, z2.b
13; CHECK-NEXT:    ret
14  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sabalb.nxv8i16(<vscale x 8 x i16> %a,
15                                                                  <vscale x 16 x i8> %b,
16                                                                  <vscale x 16 x i8> %c)
17  ret <vscale x 8 x i16> %out
18}
19
20define <vscale x 4 x i32> @sabalb_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
21; CHECK-LABEL: sabalb_h:
22; CHECK:       // %bb.0:
23; CHECK-NEXT:    sabalb z0.s, z1.h, z2.h
24; CHECK-NEXT:    ret
25  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sabalb.nxv4i32(<vscale x 4 x i32> %a,
26                                                                  <vscale x 8 x i16> %b,
27                                                                  <vscale x 8 x i16> %c)
28  ret <vscale x 4 x i32> %out
29}
30
31define <vscale x 2 x i64> @sabalb_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
32; CHECK-LABEL: sabalb_s:
33; CHECK:       // %bb.0:
34; CHECK-NEXT:    sabalb z0.d, z1.s, z2.s
35; CHECK-NEXT:    ret
36  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sabalb.nxv2i64(<vscale x 2 x i64> %a,
37                                                                  <vscale x 4 x i32> %b,
38                                                                  <vscale x 4 x i32> %c)
39  ret <vscale x 2 x i64> %out
40}
41
42;
43; SABALT
44;
45
46define <vscale x 8 x i16> @sabalt_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
47; CHECK-LABEL: sabalt_b:
48; CHECK:       // %bb.0:
49; CHECK-NEXT:    sabalt z0.h, z1.b, z2.b
50; CHECK-NEXT:    ret
51  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sabalt.nxv8i16(<vscale x 8 x i16> %a,
52                                                                  <vscale x 16 x i8> %b,
53                                                                  <vscale x 16 x i8> %c)
54  ret <vscale x 8 x i16> %out
55}
56
57define <vscale x 4 x i32> @sabalt_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
58; CHECK-LABEL: sabalt_h:
59; CHECK:       // %bb.0:
60; CHECK-NEXT:    sabalt z0.s, z1.h, z2.h
61; CHECK-NEXT:    ret
62  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sabalt.nxv4i32(<vscale x 4 x i32> %a,
63                                                                  <vscale x 8 x i16> %b,
64                                                                  <vscale x 8 x i16> %c)
65  ret <vscale x 4 x i32> %out
66}
67
68define <vscale x 2 x i64> @sabalt_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
69; CHECK-LABEL: sabalt_s:
70; CHECK:       // %bb.0:
71; CHECK-NEXT:    sabalt z0.d, z1.s, z2.s
72; CHECK-NEXT:    ret
73  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sabalt.nxv2i64(<vscale x 2 x i64> %a,
74                                                                  <vscale x 4 x i32> %b,
75                                                                  <vscale x 4 x i32> %c)
76  ret <vscale x 2 x i64> %out
77}
78
79;
80; SABDLB
81;
82
83define <vscale x 8 x i16> @sabdlb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
84; CHECK-LABEL: sabdlb_b:
85; CHECK:       // %bb.0:
86; CHECK-NEXT:    sabdlb z0.h, z0.b, z1.b
87; CHECK-NEXT:    ret
88  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sabdlb.nxv8i16(<vscale x 16 x i8> %a,
89                                                                  <vscale x 16 x i8> %b)
90  ret <vscale x 8 x i16> %out
91}
92
93define <vscale x 4 x i32> @sabdlb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
94; CHECK-LABEL: sabdlb_h:
95; CHECK:       // %bb.0:
96; CHECK-NEXT:    sabdlb z0.s, z0.h, z1.h
97; CHECK-NEXT:    ret
98  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sabdlb.nxv4i32(<vscale x 8 x i16> %a,
99                                                                  <vscale x 8 x i16> %b)
100  ret <vscale x 4 x i32> %out
101}
102
103define <vscale x 2 x i64> @sabdlb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
104; CHECK-LABEL: sabdlb_s:
105; CHECK:       // %bb.0:
106; CHECK-NEXT:    sabdlb z0.d, z0.s, z1.s
107; CHECK-NEXT:    ret
108  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sabdlb.nxv2i64(<vscale x 4 x i32> %a,
109                                                                  <vscale x 4 x i32> %b)
110  ret <vscale x 2 x i64> %out
111}
112
113;
114; SABDLT
115;
116
117define <vscale x 8 x i16> @sabdlt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
118; CHECK-LABEL: sabdlt_b:
119; CHECK:       // %bb.0:
120; CHECK-NEXT:    sabdlt z0.h, z0.b, z1.b
121; CHECK-NEXT:    ret
122  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sabdlt.nxv8i16(<vscale x 16 x i8> %a,
123                                                                  <vscale x 16 x i8> %b)
124  ret <vscale x 8 x i16> %out
125}
126
127define <vscale x 4 x i32> @sabdlt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
128; CHECK-LABEL: sabdlt_h:
129; CHECK:       // %bb.0:
130; CHECK-NEXT:    sabdlt z0.s, z0.h, z1.h
131; CHECK-NEXT:    ret
132  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sabdlt.nxv4i32(<vscale x 8 x i16> %a,
133                                                                  <vscale x 8 x i16> %b)
134  ret <vscale x 4 x i32> %out
135}
136
137define <vscale x 2 x i64> @sabdlt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
138; CHECK-LABEL: sabdlt_s:
139; CHECK:       // %bb.0:
140; CHECK-NEXT:    sabdlt z0.d, z0.s, z1.s
141; CHECK-NEXT:    ret
142  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sabdlt.nxv2i64(<vscale x 4 x i32> %a,
143                                                                  <vscale x 4 x i32> %b)
144  ret <vscale x 2 x i64> %out
145}
146
147;
148; SADDLB
149;
150
151define <vscale x 8 x i16> @saddlb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
152; CHECK-LABEL: saddlb_b:
153; CHECK:       // %bb.0:
154; CHECK-NEXT:    saddlb z0.h, z0.b, z1.b
155; CHECK-NEXT:    ret
156  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.saddlb.nxv8i16(<vscale x 16 x i8> %a,
157                                                                  <vscale x 16 x i8> %b)
158  ret <vscale x 8 x i16> %out
159}
160
161define <vscale x 4 x i32> @saddlb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
162; CHECK-LABEL: saddlb_h:
163; CHECK:       // %bb.0:
164; CHECK-NEXT:    saddlb z0.s, z0.h, z1.h
165; CHECK-NEXT:    ret
166  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.saddlb.nxv4i32(<vscale x 8 x i16> %a,
167                                                                  <vscale x 8 x i16> %b)
168  ret <vscale x 4 x i32> %out
169}
170
171define <vscale x 2 x i64> @saddlb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
172; CHECK-LABEL: saddlb_s:
173; CHECK:       // %bb.0:
174; CHECK-NEXT:    saddlb z0.d, z0.s, z1.s
175; CHECK-NEXT:    ret
176  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.saddlb.nxv2i64(<vscale x 4 x i32> %a,
177                                                                  <vscale x 4 x i32> %b)
178  ret <vscale x 2 x i64> %out
179}
180
181;
182; SADDLT
183;
184
185define <vscale x 8 x i16> @saddlt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
186; CHECK-LABEL: saddlt_b:
187; CHECK:       // %bb.0:
188; CHECK-NEXT:    saddlt z0.h, z0.b, z1.b
189; CHECK-NEXT:    ret
190  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.saddlt.nxv8i16(<vscale x 16 x i8> %a,
191                                                                  <vscale x 16 x i8> %b)
192  ret <vscale x 8 x i16> %out
193}
194
195define <vscale x 4 x i32> @saddlt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
196; CHECK-LABEL: saddlt_h:
197; CHECK:       // %bb.0:
198; CHECK-NEXT:    saddlt z0.s, z0.h, z1.h
199; CHECK-NEXT:    ret
200  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.saddlt.nxv4i32(<vscale x 8 x i16> %a,
201                                                                  <vscale x 8 x i16> %b)
202  ret <vscale x 4 x i32> %out
203}
204
205define <vscale x 2 x i64> @saddlt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
206; CHECK-LABEL: saddlt_s:
207; CHECK:       // %bb.0:
208; CHECK-NEXT:    saddlt z0.d, z0.s, z1.s
209; CHECK-NEXT:    ret
210  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.saddlt.nxv2i64(<vscale x 4 x i32> %a,
211                                                                  <vscale x 4 x i32> %b)
212  ret <vscale x 2 x i64> %out
213}
214
215;
216; SADDWB
217;
218
219define <vscale x 8 x i16> @saddwb_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b) {
220; CHECK-LABEL: saddwb_b:
221; CHECK:       // %bb.0:
222; CHECK-NEXT:    saddwb z0.h, z0.h, z1.b
223; CHECK-NEXT:    ret
224  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.saddwb.nxv8i16(<vscale x 8 x i16> %a,
225                                                                  <vscale x 16 x i8> %b)
226  ret <vscale x 8 x i16> %out
227}
228
229define <vscale x 4 x i32> @saddwb_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b) {
230; CHECK-LABEL: saddwb_h:
231; CHECK:       // %bb.0:
232; CHECK-NEXT:    saddwb z0.s, z0.s, z1.h
233; CHECK-NEXT:    ret
234  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.saddwb.nxv4i32(<vscale x 4 x i32> %a,
235                                                                  <vscale x 8 x i16> %b)
236  ret <vscale x 4 x i32> %out
237}
238
239define <vscale x 2 x i64> @saddwb_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b) {
240; CHECK-LABEL: saddwb_s:
241; CHECK:       // %bb.0:
242; CHECK-NEXT:    saddwb z0.d, z0.d, z1.s
243; CHECK-NEXT:    ret
244  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.saddwb.nxv2i64(<vscale x 2 x i64> %a,
245                                                                  <vscale x 4 x i32> %b)
246  ret <vscale x 2 x i64> %out
247}
248
249;
250; SADDWT
251;
252
253define <vscale x 8 x i16> @saddwt_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b) {
254; CHECK-LABEL: saddwt_b:
255; CHECK:       // %bb.0:
256; CHECK-NEXT:    saddwt z0.h, z0.h, z1.b
257; CHECK-NEXT:    ret
258  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.saddwt.nxv8i16(<vscale x 8 x i16> %a,
259                                                                  <vscale x 16 x i8> %b)
260  ret <vscale x 8 x i16> %out
261}
262
263define <vscale x 4 x i32> @saddwt_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b) {
264; CHECK-LABEL: saddwt_h:
265; CHECK:       // %bb.0:
266; CHECK-NEXT:    saddwt z0.s, z0.s, z1.h
267; CHECK-NEXT:    ret
268  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.saddwt.nxv4i32(<vscale x 4 x i32> %a,
269                                                                  <vscale x 8 x i16> %b)
270  ret <vscale x 4 x i32> %out
271}
272
273define <vscale x 2 x i64> @saddwt_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b) {
274; CHECK-LABEL: saddwt_s:
275; CHECK:       // %bb.0:
276; CHECK-NEXT:    saddwt z0.d, z0.d, z1.s
277; CHECK-NEXT:    ret
278  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.saddwt.nxv2i64(<vscale x 2 x i64> %a,
279                                                                  <vscale x 4 x i32> %b)
280  ret <vscale x 2 x i64> %out
281}
282
283
284;
285; SMULLB (Vectors)
286;
287
288define <vscale x 8 x i16> @smullb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
289; CHECK-LABEL: smullb_b:
290; CHECK:       // %bb.0:
291; CHECK-NEXT:    smullb z0.h, z0.b, z1.b
292; CHECK-NEXT:    ret
293  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.smullb.nxv8i16(<vscale x 16 x i8> %a,
294                                                                  <vscale x 16 x i8> %b)
295  ret <vscale x 8 x i16> %out
296}
297
298define <vscale x 4 x i32> @smullb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
299; CHECK-LABEL: smullb_h:
300; CHECK:       // %bb.0:
301; CHECK-NEXT:    smullb z0.s, z0.h, z1.h
302; CHECK-NEXT:    ret
303  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smullb.nxv4i32(<vscale x 8 x i16> %a,
304                                                                  <vscale x 8 x i16> %b)
305  ret <vscale x 4 x i32> %out
306}
307
308define <vscale x 2 x i64> @smullb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
309; CHECK-LABEL: smullb_s:
310; CHECK:       // %bb.0:
311; CHECK-NEXT:    smullb z0.d, z0.s, z1.s
312; CHECK-NEXT:    ret
313  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smullb.nxv2i64(<vscale x 4 x i32> %a,
314                                                                  <vscale x 4 x i32> %b)
315  ret <vscale x 2 x i64> %out
316}
317
318;
319; SMULLB (Indexed)
320;
321
322define <vscale x 4 x i32> @smullb_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
323; CHECK-LABEL: smullb_lane_h:
324; CHECK:       // %bb.0:
325; CHECK-NEXT:    smullb z0.s, z0.h, z1.h[4]
326; CHECK-NEXT:    ret
327  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smullb.lane.nxv4i32(<vscale x 8 x i16> %a,
328                                                                       <vscale x 8 x i16> %b,
329                                                                       i32 4)
330  ret <vscale x 4 x i32> %out
331}
332
333define <vscale x 2 x i64> @smullb_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
334; CHECK-LABEL: smullb_lane_s:
335; CHECK:       // %bb.0:
336; CHECK-NEXT:    smullb z0.d, z0.s, z1.s[3]
337; CHECK-NEXT:    ret
338  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smullb.lane.nxv2i64(<vscale x 4 x i32> %a,
339                                                                       <vscale x 4 x i32> %b,
340                                                                       i32 3)
341  ret <vscale x 2 x i64> %out
342}
343
344;
345; SMULLT (Vectors)
346;
347
348define <vscale x 8 x i16> @smullt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
349; CHECK-LABEL: smullt_b:
350; CHECK:       // %bb.0:
351; CHECK-NEXT:    smullt z0.h, z0.b, z1.b
352; CHECK-NEXT:    ret
353  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.smullt.nxv8i16(<vscale x 16 x i8> %a,
354                                                                  <vscale x 16 x i8> %b)
355  ret <vscale x 8 x i16> %out
356}
357
358define <vscale x 4 x i32> @smullt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
359; CHECK-LABEL: smullt_h:
360; CHECK:       // %bb.0:
361; CHECK-NEXT:    smullt z0.s, z0.h, z1.h
362; CHECK-NEXT:    ret
363  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smullt.nxv4i32(<vscale x 8 x i16> %a,
364                                                                  <vscale x 8 x i16> %b)
365  ret <vscale x 4 x i32> %out
366}
367
368define <vscale x 2 x i64> @smullt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
369; CHECK-LABEL: smullt_s:
370; CHECK:       // %bb.0:
371; CHECK-NEXT:    smullt z0.d, z0.s, z1.s
372; CHECK-NEXT:    ret
373  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smullt.nxv2i64(<vscale x 4 x i32> %a,
374                                                                  <vscale x 4 x i32> %b)
375  ret <vscale x 2 x i64> %out
376}
377
378;
379; SMULLT (Indexed)
380;
381
382define <vscale x 4 x i32> @smullt_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
383; CHECK-LABEL: smullt_lane_h:
384; CHECK:       // %bb.0:
385; CHECK-NEXT:    smullt z0.s, z0.h, z1.h[5]
386; CHECK-NEXT:    ret
387  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smullt.lane.nxv4i32(<vscale x 8 x i16> %a,
388                                                                       <vscale x 8 x i16> %b,
389                                                                       i32 5)
390  ret <vscale x 4 x i32> %out
391}
392
393define <vscale x 2 x i64> @smullt_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
394; CHECK-LABEL: smullt_lane_s:
395; CHECK:       // %bb.0:
396; CHECK-NEXT:    smullt z0.d, z0.s, z1.s[2]
397; CHECK-NEXT:    ret
398  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smullt.lane.nxv2i64(<vscale x 4 x i32> %a,
399                                                                       <vscale x 4 x i32> %b,
400                                                                       i32 2)
401  ret <vscale x 2 x i64> %out
402}
403
404;
405; SQDMULLB (Vectors)
406;
407
408define <vscale x 8 x i16> @sqdmullb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
409; CHECK-LABEL: sqdmullb_b:
410; CHECK:       // %bb.0:
411; CHECK-NEXT:    sqdmullb z0.h, z0.b, z1.b
412; CHECK-NEXT:    ret
413  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqdmullb.nxv8i16(<vscale x 16 x i8> %a,
414                                                                    <vscale x 16 x i8> %b)
415  ret <vscale x 8 x i16> %out
416}
417
418define <vscale x 4 x i32> @sqdmullb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
419; CHECK-LABEL: sqdmullb_h:
420; CHECK:       // %bb.0:
421; CHECK-NEXT:    sqdmullb z0.s, z0.h, z1.h
422; CHECK-NEXT:    ret
423  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqdmullb.nxv4i32(<vscale x 8 x i16> %a,
424                                                                    <vscale x 8 x i16> %b)
425  ret <vscale x 4 x i32> %out
426}
427
428define <vscale x 2 x i64> @sqdmullb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
429; CHECK-LABEL: sqdmullb_s:
430; CHECK:       // %bb.0:
431; CHECK-NEXT:    sqdmullb z0.d, z0.s, z1.s
432; CHECK-NEXT:    ret
433  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqdmullb.nxv2i64(<vscale x 4 x i32> %a,
434                                                                    <vscale x 4 x i32> %b)
435  ret <vscale x 2 x i64> %out
436}
437
438;
439; SQDMULLB (Indexed)
440;
441
442define <vscale x 4 x i32> @sqdmullb_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
443; CHECK-LABEL: sqdmullb_lane_h:
444; CHECK:       // %bb.0:
445; CHECK-NEXT:    sqdmullb z0.s, z0.h, z1.h[2]
446; CHECK-NEXT:    ret
447  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqdmullb.lane.nxv4i32(<vscale x 8 x i16> %a,
448                                                                         <vscale x 8 x i16> %b,
449                                                                         i32 2)
450  ret <vscale x 4 x i32> %out
451}
452
453define <vscale x 2 x i64> @sqdmullb_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
454; CHECK-LABEL: sqdmullb_lane_s:
455; CHECK:       // %bb.0:
456; CHECK-NEXT:    sqdmullb z0.d, z0.s, z1.s[1]
457; CHECK-NEXT:    ret
458  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqdmullb.lane.nxv2i64(<vscale x 4 x i32> %a,
459                                                                         <vscale x 4 x i32> %b,
460                                                                         i32 1)
461  ret <vscale x 2 x i64> %out
462}
463
464;
465; SQDMULLT (Vectors)
466;
467
468define <vscale x 8 x i16> @sqdmullt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
469; CHECK-LABEL: sqdmullt_b:
470; CHECK:       // %bb.0:
471; CHECK-NEXT:    sqdmullt z0.h, z0.b, z1.b
472; CHECK-NEXT:    ret
473  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqdmullt.nxv8i16(<vscale x 16 x i8> %a,
474                                                                    <vscale x 16 x i8> %b)
475  ret <vscale x 8 x i16> %out
476}
477
478define <vscale x 4 x i32> @sqdmullt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
479; CHECK-LABEL: sqdmullt_h:
480; CHECK:       // %bb.0:
481; CHECK-NEXT:    sqdmullt z0.s, z0.h, z1.h
482; CHECK-NEXT:    ret
483  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqdmullt.nxv4i32(<vscale x 8 x i16> %a,
484                                                                    <vscale x 8 x i16> %b)
485  ret <vscale x 4 x i32> %out
486}
487
488define <vscale x 2 x i64> @sqdmullt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
489; CHECK-LABEL: sqdmullt_s:
490; CHECK:       // %bb.0:
491; CHECK-NEXT:    sqdmullt z0.d, z0.s, z1.s
492; CHECK-NEXT:    ret
493  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqdmullt.nxv2i64(<vscale x 4 x i32> %a,
494                                                                    <vscale x 4 x i32> %b)
495  ret <vscale x 2 x i64> %out
496}
497
498;
499; SQDMULLT (Indexed)
500;
501
502define <vscale x 4 x i32> @sqdmullt_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
503; CHECK-LABEL: sqdmullt_lane_h:
504; CHECK:       // %bb.0:
505; CHECK-NEXT:    sqdmullt z0.s, z0.h, z1.h[3]
506; CHECK-NEXT:    ret
507  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqdmullt.lane.nxv4i32(<vscale x 8 x i16> %a,
508                                                                         <vscale x 8 x i16> %b,
509                                                                         i32 3)
510  ret <vscale x 4 x i32> %out
511}
512
513define <vscale x 2 x i64> @sqdmullt_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
514; CHECK-LABEL: sqdmullt_lane_s:
515; CHECK:       // %bb.0:
516; CHECK-NEXT:    sqdmullt z0.d, z0.s, z1.s[0]
517; CHECK-NEXT:    ret
518  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqdmullt.lane.nxv2i64(<vscale x 4 x i32> %a,
519                                                                         <vscale x 4 x i32> %b,
520                                                                         i32 0)
521  ret <vscale x 2 x i64> %out
522}
523
524;
525; SSUBLB
526;
527
528define <vscale x 8 x i16> @ssublb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
529; CHECK-LABEL: ssublb_b:
530; CHECK:       // %bb.0:
531; CHECK-NEXT:    ssublb z0.h, z0.b, z1.b
532; CHECK-NEXT:    ret
533  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ssublb.nxv8i16(<vscale x 16 x i8> %a,
534                                                                  <vscale x 16 x i8> %b)
535  ret <vscale x 8 x i16> %out
536}
537
538define <vscale x 4 x i32> @ssublb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
539; CHECK-LABEL: ssublb_h:
540; CHECK:       // %bb.0:
541; CHECK-NEXT:    ssublb z0.s, z0.h, z1.h
542; CHECK-NEXT:    ret
543  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ssublb.nxv4i32(<vscale x 8 x i16> %a,
544                                                                  <vscale x 8 x i16> %b)
545  ret <vscale x 4 x i32> %out
546}
547
548define <vscale x 2 x i64> @ssublb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
549; CHECK-LABEL: ssublb_s:
550; CHECK:       // %bb.0:
551; CHECK-NEXT:    ssublb z0.d, z0.s, z1.s
552; CHECK-NEXT:    ret
553  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ssublb.nxv2i64(<vscale x 4 x i32> %a,
554                                                                  <vscale x 4 x i32> %b)
555  ret <vscale x 2 x i64> %out
556}
557
558;
559; SSHLLB
560;
561
562define <vscale x 8 x i16> @sshllb_b(<vscale x 16 x i8> %a) {
563; CHECK-LABEL: sshllb_b:
564; CHECK:       // %bb.0:
565; CHECK-NEXT:    sshllb z0.h, z0.b, #0
566; CHECK-NEXT:    ret
567  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sshllb.nxv8i16(<vscale x 16 x i8> %a, i32 0)
568  ret <vscale x 8 x i16> %out
569}
570
571define <vscale x 4 x i32> @sshllb_h(<vscale x 8 x i16> %a) {
572; CHECK-LABEL: sshllb_h:
573; CHECK:       // %bb.0:
574; CHECK-NEXT:    sshllb z0.s, z0.h, #1
575; CHECK-NEXT:    ret
576  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sshllb.nxv4i32(<vscale x 8 x i16> %a, i32 1)
577  ret <vscale x 4 x i32> %out
578}
579
580define <vscale x 2 x i64> @sshllb_s(<vscale x 4 x i32> %a) {
581; CHECK-LABEL: sshllb_s:
582; CHECK:       // %bb.0:
583; CHECK-NEXT:    sshllb z0.d, z0.s, #2
584; CHECK-NEXT:    ret
585  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sshllb.nxv2i64(<vscale x 4 x i32> %a, i32 2)
586  ret <vscale x 2 x i64> %out
587}
588
589;
590; SSHLLT
591;
592
593define <vscale x 8 x i16> @sshllt_b(<vscale x 16 x i8> %a) {
594; CHECK-LABEL: sshllt_b:
595; CHECK:       // %bb.0:
596; CHECK-NEXT:    sshllt z0.h, z0.b, #3
597; CHECK-NEXT:    ret
598  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sshllt.nxv8i16(<vscale x 16 x i8> %a, i32 3)
599  ret <vscale x 8 x i16> %out
600}
601
602define <vscale x 4 x i32> @sshllt_h(<vscale x 8 x i16> %a) {
603; CHECK-LABEL: sshllt_h:
604; CHECK:       // %bb.0:
605; CHECK-NEXT:    sshllt z0.s, z0.h, #4
606; CHECK-NEXT:    ret
607  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sshllt.nxv4i32(<vscale x 8 x i16> %a, i32 4)
608  ret <vscale x 4 x i32> %out
609}
610
611define <vscale x 2 x i64> @sshllt_s(<vscale x 4 x i32> %a) {
612; CHECK-LABEL: sshllt_s:
613; CHECK:       // %bb.0:
614; CHECK-NEXT:    sshllt z0.d, z0.s, #5
615; CHECK-NEXT:    ret
616  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sshllt.nxv2i64(<vscale x 4 x i32> %a, i32 5)
617  ret <vscale x 2 x i64> %out
618}
619
620;
621; SSUBLT
622;
623
624define <vscale x 8 x i16> @ssublt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
625; CHECK-LABEL: ssublt_b:
626; CHECK:       // %bb.0:
627; CHECK-NEXT:    ssublt z0.h, z0.b, z1.b
628; CHECK-NEXT:    ret
629  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ssublt.nxv8i16(<vscale x 16 x i8> %a,
630                                                                  <vscale x 16 x i8> %b)
631  ret <vscale x 8 x i16> %out
632}
633
634define <vscale x 4 x i32> @ssublt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
635; CHECK-LABEL: ssublt_h:
636; CHECK:       // %bb.0:
637; CHECK-NEXT:    ssublt z0.s, z0.h, z1.h
638; CHECK-NEXT:    ret
639  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ssublt.nxv4i32(<vscale x 8 x i16> %a,
640                                                                  <vscale x 8 x i16> %b)
641  ret <vscale x 4 x i32> %out
642}
643
644define <vscale x 2 x i64> @ssublt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
645; CHECK-LABEL: ssublt_s:
646; CHECK:       // %bb.0:
647; CHECK-NEXT:    ssublt z0.d, z0.s, z1.s
648; CHECK-NEXT:    ret
649  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ssublt.nxv2i64(<vscale x 4 x i32> %a,
650                                                                  <vscale x 4 x i32> %b)
651  ret <vscale x 2 x i64> %out
652}
653
654;
655; SSUBWB
656;
657
658define <vscale x 8 x i16> @ssubwb_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b) {
659; CHECK-LABEL: ssubwb_b:
660; CHECK:       // %bb.0:
661; CHECK-NEXT:    ssubwb z0.h, z0.h, z1.b
662; CHECK-NEXT:    ret
663  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ssubwb.nxv8i16(<vscale x 8 x i16> %a,
664                                                                  <vscale x 16 x i8> %b)
665  ret <vscale x 8 x i16> %out
666}
667
668define <vscale x 4 x i32> @ssubwb_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b) {
669; CHECK-LABEL: ssubwb_h:
670; CHECK:       // %bb.0:
671; CHECK-NEXT:    ssubwb z0.s, z0.s, z1.h
672; CHECK-NEXT:    ret
673  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ssubwb.nxv4i32(<vscale x 4 x i32> %a,
674                                                                  <vscale x 8 x i16> %b)
675  ret <vscale x 4 x i32> %out
676}
677
678define <vscale x 2 x i64> @ssubwb_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b) {
679; CHECK-LABEL: ssubwb_s:
680; CHECK:       // %bb.0:
681; CHECK-NEXT:    ssubwb z0.d, z0.d, z1.s
682; CHECK-NEXT:    ret
683  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ssubwb.nxv2i64(<vscale x 2 x i64> %a,
684                                                                  <vscale x 4 x i32> %b)
685  ret <vscale x 2 x i64> %out
686}
687
688;
689; SSUBWT
690;
691
692define <vscale x 8 x i16> @ssubwt_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b) {
693; CHECK-LABEL: ssubwt_b:
694; CHECK:       // %bb.0:
695; CHECK-NEXT:    ssubwt z0.h, z0.h, z1.b
696; CHECK-NEXT:    ret
697  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ssubwt.nxv8i16(<vscale x 8 x i16> %a,
698                                                                  <vscale x 16 x i8> %b)
699  ret <vscale x 8 x i16> %out
700}
701
702define <vscale x 4 x i32> @ssubwt_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b) {
703; CHECK-LABEL: ssubwt_h:
704; CHECK:       // %bb.0:
705; CHECK-NEXT:    ssubwt z0.s, z0.s, z1.h
706; CHECK-NEXT:    ret
707  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ssubwt.nxv4i32(<vscale x 4 x i32> %a,
708                                                                  <vscale x 8 x i16> %b)
709  ret <vscale x 4 x i32> %out
710}
711
712define <vscale x 2 x i64> @ssubwt_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b) {
713; CHECK-LABEL: ssubwt_s:
714; CHECK:       // %bb.0:
715; CHECK-NEXT:    ssubwt z0.d, z0.d, z1.s
716; CHECK-NEXT:    ret
717  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ssubwt.nxv2i64(<vscale x 2 x i64> %a,
718                                                                  <vscale x 4 x i32> %b)
719  ret <vscale x 2 x i64> %out
720}
721
722;
723; UABALB
724;
725
726define <vscale x 8 x i16> @uabalb_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
727; CHECK-LABEL: uabalb_b:
728; CHECK:       // %bb.0:
729; CHECK-NEXT:    uabalb z0.h, z1.b, z2.b
730; CHECK-NEXT:    ret
731  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uabalb.nxv8i16(<vscale x 8 x i16> %a,
732                                                                  <vscale x 16 x i8> %b,
733                                                                  <vscale x 16 x i8> %c)
734  ret <vscale x 8 x i16> %out
735}
736
737define <vscale x 4 x i32> @uabalb_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
738; CHECK-LABEL: uabalb_h:
739; CHECK:       // %bb.0:
740; CHECK-NEXT:    uabalb z0.s, z1.h, z2.h
741; CHECK-NEXT:    ret
742  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uabalb.nxv4i32(<vscale x 4 x i32> %a,
743                                                                  <vscale x 8 x i16> %b,
744                                                                  <vscale x 8 x i16> %c)
745  ret <vscale x 4 x i32> %out
746}
747
748define <vscale x 2 x i64> @uabalb_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
749; CHECK-LABEL: uabalb_s:
750; CHECK:       // %bb.0:
751; CHECK-NEXT:    uabalb z0.d, z1.s, z2.s
752; CHECK-NEXT:    ret
753  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uabalb.nxv2i64(<vscale x 2 x i64> %a,
754                                                                  <vscale x 4 x i32> %b,
755                                                                  <vscale x 4 x i32> %c)
756  ret <vscale x 2 x i64> %out
757}
758
759;
760; UABALT
761;
762
763define <vscale x 8 x i16> @uabalt_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
764; CHECK-LABEL: uabalt_b:
765; CHECK:       // %bb.0:
766; CHECK-NEXT:    uabalt z0.h, z1.b, z2.b
767; CHECK-NEXT:    ret
768  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uabalt.nxv8i16(<vscale x 8 x i16> %a,
769                                                                  <vscale x 16 x i8> %b,
770                                                                  <vscale x 16 x i8> %c)
771  ret <vscale x 8 x i16> %out
772}
773
774define <vscale x 4 x i32> @uabalt_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
775; CHECK-LABEL: uabalt_h:
776; CHECK:       // %bb.0:
777; CHECK-NEXT:    uabalt z0.s, z1.h, z2.h
778; CHECK-NEXT:    ret
779  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uabalt.nxv4i32(<vscale x 4 x i32> %a,
780                                                                  <vscale x 8 x i16> %b,
781                                                                  <vscale x 8 x i16> %c)
782  ret <vscale x 4 x i32> %out
783}
784
785define <vscale x 2 x i64> @uabalt_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
786; CHECK-LABEL: uabalt_s:
787; CHECK:       // %bb.0:
788; CHECK-NEXT:    uabalt z0.d, z1.s, z2.s
789; CHECK-NEXT:    ret
790  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uabalt.nxv2i64(<vscale x 2 x i64> %a,
791                                                                  <vscale x 4 x i32> %b,
792                                                                  <vscale x 4 x i32> %c)
793  ret <vscale x 2 x i64> %out
794}
795
796;
797; UABDLB
798;
799
800define <vscale x 8 x i16> @uabdlb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
801; CHECK-LABEL: uabdlb_b:
802; CHECK:       // %bb.0:
803; CHECK-NEXT:    uabdlb z0.h, z0.b, z1.b
804; CHECK-NEXT:    ret
805  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uabdlb.nxv8i16(<vscale x 16 x i8> %a,
806                                                                  <vscale x 16 x i8> %b)
807  ret <vscale x 8 x i16> %out
808}
809
810define <vscale x 4 x i32> @uabdlb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
811; CHECK-LABEL: uabdlb_h:
812; CHECK:       // %bb.0:
813; CHECK-NEXT:    uabdlb z0.s, z0.h, z1.h
814; CHECK-NEXT:    ret
815  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uabdlb.nxv4i32(<vscale x 8 x i16> %a,
816                                                                  <vscale x 8 x i16> %b)
817  ret <vscale x 4 x i32> %out
818}
819
820define <vscale x 2 x i64> @uabdlb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
821; CHECK-LABEL: uabdlb_s:
822; CHECK:       // %bb.0:
823; CHECK-NEXT:    uabdlb z0.d, z0.s, z1.s
824; CHECK-NEXT:    ret
825  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uabdlb.nxv2i64(<vscale x 4 x i32> %a,
826                                                                  <vscale x 4 x i32> %b)
827  ret <vscale x 2 x i64> %out
828}
829
830;
831; UABDLT
832;
833
834define <vscale x 8 x i16> @uabdlt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
835; CHECK-LABEL: uabdlt_b:
836; CHECK:       // %bb.0:
837; CHECK-NEXT:    uabdlt z0.h, z0.b, z1.b
838; CHECK-NEXT:    ret
839  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uabdlt.nxv8i16(<vscale x 16 x i8> %a,
840                                                                  <vscale x 16 x i8> %b)
841  ret <vscale x 8 x i16> %out
842}
843
844define <vscale x 4 x i32> @uabdlt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
845; CHECK-LABEL: uabdlt_h:
846; CHECK:       // %bb.0:
847; CHECK-NEXT:    uabdlt z0.s, z0.h, z1.h
848; CHECK-NEXT:    ret
849  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uabdlt.nxv4i32(<vscale x 8 x i16> %a,
850                                                                  <vscale x 8 x i16> %b)
851  ret <vscale x 4 x i32> %out
852}
853
854define <vscale x 2 x i64> @uabdlt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
855; CHECK-LABEL: uabdlt_s:
856; CHECK:       // %bb.0:
857; CHECK-NEXT:    uabdlt z0.d, z0.s, z1.s
858; CHECK-NEXT:    ret
859  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uabdlt.nxv2i64(<vscale x 4 x i32> %a,
860                                                                  <vscale x 4 x i32> %b)
861  ret <vscale x 2 x i64> %out
862}
863
864;
865; UADDLB
866;
867
868define <vscale x 8 x i16> @uaddlb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
869; CHECK-LABEL: uaddlb_b:
870; CHECK:       // %bb.0:
871; CHECK-NEXT:    uaddlb z0.h, z0.b, z1.b
872; CHECK-NEXT:    ret
873  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uaddlb.nxv8i16(<vscale x 16 x i8> %a,
874                                                                  <vscale x 16 x i8> %b)
875  ret <vscale x 8 x i16> %out
876}
877
878define <vscale x 4 x i32> @uaddlb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
879; CHECK-LABEL: uaddlb_h:
880; CHECK:       // %bb.0:
881; CHECK-NEXT:    uaddlb z0.s, z0.h, z1.h
882; CHECK-NEXT:    ret
883  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uaddlb.nxv4i32(<vscale x 8 x i16> %a,
884                                                                  <vscale x 8 x i16> %b)
885  ret <vscale x 4 x i32> %out
886}
887
888define <vscale x 2 x i64> @uaddlb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
889; CHECK-LABEL: uaddlb_s:
890; CHECK:       // %bb.0:
891; CHECK-NEXT:    uaddlb z0.d, z0.s, z1.s
892; CHECK-NEXT:    ret
893  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uaddlb.nxv2i64(<vscale x 4 x i32> %a,
894                                                                  <vscale x 4 x i32> %b)
895  ret <vscale x 2 x i64> %out
896}
897
898;
899; UADDLT
900;
901
902define <vscale x 8 x i16> @uaddlt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
903; CHECK-LABEL: uaddlt_b:
904; CHECK:       // %bb.0:
905; CHECK-NEXT:    uaddlt z0.h, z0.b, z1.b
906; CHECK-NEXT:    ret
907  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uaddlt.nxv8i16(<vscale x 16 x i8> %a,
908                                                                  <vscale x 16 x i8> %b)
909  ret <vscale x 8 x i16> %out
910}
911
912define <vscale x 4 x i32> @uaddlt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
913; CHECK-LABEL: uaddlt_h:
914; CHECK:       // %bb.0:
915; CHECK-NEXT:    uaddlt z0.s, z0.h, z1.h
916; CHECK-NEXT:    ret
917  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uaddlt.nxv4i32(<vscale x 8 x i16> %a,
918                                                                  <vscale x 8 x i16> %b)
919  ret <vscale x 4 x i32> %out
920}
921
922define <vscale x 2 x i64> @uaddlt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
923; CHECK-LABEL: uaddlt_s:
924; CHECK:       // %bb.0:
925; CHECK-NEXT:    uaddlt z0.d, z0.s, z1.s
926; CHECK-NEXT:    ret
927  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uaddlt.nxv2i64(<vscale x 4 x i32> %a,
928                                                                  <vscale x 4 x i32> %b)
929  ret <vscale x 2 x i64> %out
930}
931
932;
933; UADDWB
934;
935
936define <vscale x 8 x i16> @uaddwb_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b) {
937; CHECK-LABEL: uaddwb_b:
938; CHECK:       // %bb.0:
939; CHECK-NEXT:    uaddwb z0.h, z0.h, z1.b
940; CHECK-NEXT:    ret
941  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uaddwb.nxv8i16(<vscale x 8 x i16> %a,
942                                                                  <vscale x 16 x i8> %b)
943  ret <vscale x 8 x i16> %out
944}
945
946define <vscale x 4 x i32> @uaddwb_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b) {
947; CHECK-LABEL: uaddwb_h:
948; CHECK:       // %bb.0:
949; CHECK-NEXT:    uaddwb z0.s, z0.s, z1.h
950; CHECK-NEXT:    ret
951  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uaddwb.nxv4i32(<vscale x 4 x i32> %a,
952                                                                  <vscale x 8 x i16> %b)
953  ret <vscale x 4 x i32> %out
954}
955
956define <vscale x 2 x i64> @uaddwb_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b) {
957; CHECK-LABEL: uaddwb_s:
958; CHECK:       // %bb.0:
959; CHECK-NEXT:    uaddwb z0.d, z0.d, z1.s
960; CHECK-NEXT:    ret
961  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uaddwb.nxv2i64(<vscale x 2 x i64> %a,
962                                                                  <vscale x 4 x i32> %b)
963  ret <vscale x 2 x i64> %out
964}
965
966;
967; UADDWT
968;
969
970define <vscale x 8 x i16> @uaddwt_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b) {
971; CHECK-LABEL: uaddwt_b:
972; CHECK:       // %bb.0:
973; CHECK-NEXT:    uaddwt z0.h, z0.h, z1.b
974; CHECK-NEXT:    ret
975  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uaddwt.nxv8i16(<vscale x 8 x i16> %a,
976                                                                  <vscale x 16 x i8> %b)
977  ret <vscale x 8 x i16> %out
978}
979
980define <vscale x 4 x i32> @uaddwt_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b) {
981; CHECK-LABEL: uaddwt_h:
982; CHECK:       // %bb.0:
983; CHECK-NEXT:    uaddwt z0.s, z0.s, z1.h
984; CHECK-NEXT:    ret
985  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uaddwt.nxv4i32(<vscale x 4 x i32> %a,
986                                                                  <vscale x 8 x i16> %b)
987  ret <vscale x 4 x i32> %out
988}
989
990define <vscale x 2 x i64> @uaddwt_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b) {
991; CHECK-LABEL: uaddwt_s:
992; CHECK:       // %bb.0:
993; CHECK-NEXT:    uaddwt z0.d, z0.d, z1.s
994; CHECK-NEXT:    ret
995  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uaddwt.nxv2i64(<vscale x 2 x i64> %a,
996                                                                  <vscale x 4 x i32> %b)
997  ret <vscale x 2 x i64> %out
998}
999
1000;
1001; UMULLB (Vectors)
1002;
1003
1004define <vscale x 8 x i16> @umullb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1005; CHECK-LABEL: umullb_b:
1006; CHECK:       // %bb.0:
1007; CHECK-NEXT:    umullb z0.h, z0.b, z1.b
1008; CHECK-NEXT:    ret
1009  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.umullb.nxv8i16(<vscale x 16 x i8> %a,
1010                                                                  <vscale x 16 x i8> %b)
1011  ret <vscale x 8 x i16> %out
1012}
1013
1014define <vscale x 4 x i32> @umullb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1015; CHECK-LABEL: umullb_h:
1016; CHECK:       // %bb.0:
1017; CHECK-NEXT:    umullb z0.s, z0.h, z1.h
1018; CHECK-NEXT:    ret
1019  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umullb.nxv4i32(<vscale x 8 x i16> %a,
1020                                                                  <vscale x 8 x i16> %b)
1021  ret <vscale x 4 x i32> %out
1022}
1023
1024define <vscale x 2 x i64> @umullb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1025; CHECK-LABEL: umullb_s:
1026; CHECK:       // %bb.0:
1027; CHECK-NEXT:    umullb z0.d, z0.s, z1.s
1028; CHECK-NEXT:    ret
1029  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umullb.nxv2i64(<vscale x 4 x i32> %a,
1030                                                                  <vscale x 4 x i32> %b)
1031  ret <vscale x 2 x i64> %out
1032}
1033
1034;
1035; UMULLB (Indexed)
1036;
1037
1038define <vscale x 4 x i32> @umullb_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1039; CHECK-LABEL: umullb_lane_h:
1040; CHECK:       // %bb.0:
1041; CHECK-NEXT:    umullb z0.s, z0.h, z1.h[0]
1042; CHECK-NEXT:    ret
1043  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umullb.lane.nxv4i32(<vscale x 8 x i16> %a,
1044                                                                       <vscale x 8 x i16> %b,
1045                                                                       i32 0)
1046  ret <vscale x 4 x i32> %out
1047}
1048
1049
1050define <vscale x 2 x i64> @umullb_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1051; CHECK-LABEL: umullb_lane_s:
1052; CHECK:       // %bb.0:
1053; CHECK-NEXT:    umullb z0.d, z0.s, z1.s[3]
1054; CHECK-NEXT:    ret
1055  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umullb.lane.nxv2i64(<vscale x 4 x i32> %a,
1056                                                                       <vscale x 4 x i32> %b,
1057                                                                       i32 3)
1058  ret <vscale x 2 x i64> %out
1059}
1060
1061;
1062; UMULLT (Vectors)
1063;
1064
1065define <vscale x 8 x i16> @umullt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1066; CHECK-LABEL: umullt_b:
1067; CHECK:       // %bb.0:
1068; CHECK-NEXT:    umullt z0.h, z0.b, z1.b
1069; CHECK-NEXT:    ret
1070  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.umullt.nxv8i16(<vscale x 16 x i8> %a,
1071                                                                  <vscale x 16 x i8> %b)
1072  ret <vscale x 8 x i16> %out
1073}
1074
1075define <vscale x 4 x i32> @umullt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1076; CHECK-LABEL: umullt_h:
1077; CHECK:       // %bb.0:
1078; CHECK-NEXT:    umullt z0.s, z0.h, z1.h
1079; CHECK-NEXT:    ret
1080  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umullt.nxv4i32(<vscale x 8 x i16> %a,
1081                                                                  <vscale x 8 x i16> %b)
1082  ret <vscale x 4 x i32> %out
1083}
1084
1085define <vscale x 2 x i64> @umullt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1086; CHECK-LABEL: umullt_s:
1087; CHECK:       // %bb.0:
1088; CHECK-NEXT:    umullt z0.d, z0.s, z1.s
1089; CHECK-NEXT:    ret
1090  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umullt.nxv2i64(<vscale x 4 x i32> %a,
1091                                                                  <vscale x 4 x i32> %b)
1092  ret <vscale x 2 x i64> %out
1093}
1094
1095;
1096; UMULLT (Indexed)
1097;
1098
1099define <vscale x 4 x i32> @umullt_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1100; CHECK-LABEL: umullt_lane_h:
1101; CHECK:       // %bb.0:
1102; CHECK-NEXT:    umullt z0.s, z0.h, z1.h[1]
1103; CHECK-NEXT:    ret
1104  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umullt.lane.nxv4i32(<vscale x 8 x i16> %a,
1105                                                                       <vscale x 8 x i16> %b,
1106                                                                       i32 1)
1107  ret <vscale x 4 x i32> %out
1108}
1109
1110define <vscale x 2 x i64> @umullt_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1111; CHECK-LABEL: umullt_lane_s:
1112; CHECK:       // %bb.0:
1113; CHECK-NEXT:    umullt z0.d, z0.s, z1.s[2]
1114; CHECK-NEXT:    ret
1115  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umullt.lane.nxv2i64(<vscale x 4 x i32> %a,
1116                                                                       <vscale x 4 x i32> %b,
1117                                                                       i32 2)
1118  ret <vscale x 2 x i64> %out
1119}
1120
1121;
1122; USHLLB
1123;
1124
1125define <vscale x 8 x i16> @ushllb_b(<vscale x 16 x i8> %a) {
1126; CHECK-LABEL: ushllb_b:
1127; CHECK:       // %bb.0:
1128; CHECK-NEXT:    ushllb z0.h, z0.b, #6
1129; CHECK-NEXT:    ret
1130  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ushllb.nxv8i16(<vscale x 16 x i8> %a, i32 6)
1131  ret <vscale x 8 x i16> %out
1132}
1133
1134define <vscale x 4 x i32> @ushllb_h(<vscale x 8 x i16> %a) {
1135; CHECK-LABEL: ushllb_h:
1136; CHECK:       // %bb.0:
1137; CHECK-NEXT:    ushllb z0.s, z0.h, #7
1138; CHECK-NEXT:    ret
1139  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ushllb.nxv4i32(<vscale x 8 x i16> %a, i32 7)
1140  ret <vscale x 4 x i32> %out
1141}
1142
1143define <vscale x 2 x i64> @ushllb_s(<vscale x 4 x i32> %a) {
1144; CHECK-LABEL: ushllb_s:
1145; CHECK:       // %bb.0:
1146; CHECK-NEXT:    ushllb z0.d, z0.s, #8
1147; CHECK-NEXT:    ret
1148  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ushllb.nxv2i64(<vscale x 4 x i32> %a, i32 8)
1149  ret <vscale x 2 x i64> %out
1150}
1151
1152;
1153; USHLLT
1154;
1155
1156define <vscale x 8 x i16> @ushllt_b(<vscale x 16 x i8> %a) {
1157; CHECK-LABEL: ushllt_b:
1158; CHECK:       // %bb.0:
1159; CHECK-NEXT:    ushllt z0.h, z0.b, #7
1160; CHECK-NEXT:    ret
1161  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ushllt.nxv8i16(<vscale x 16 x i8> %a, i32 7)
1162  ret <vscale x 8 x i16> %out
1163}
1164
1165define <vscale x 4 x i32> @ushllt_h(<vscale x 8 x i16> %a) {
1166; CHECK-LABEL: ushllt_h:
1167; CHECK:       // %bb.0:
1168; CHECK-NEXT:    ushllt z0.s, z0.h, #15
1169; CHECK-NEXT:    ret
1170  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ushllt.nxv4i32(<vscale x 8 x i16> %a, i32 15)
1171  ret <vscale x 4 x i32> %out
1172}
1173
1174define <vscale x 2 x i64> @ushllt_s(<vscale x 4 x i32> %a) {
1175; CHECK-LABEL: ushllt_s:
1176; CHECK:       // %bb.0:
1177; CHECK-NEXT:    ushllt z0.d, z0.s, #31
1178; CHECK-NEXT:    ret
1179  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ushllt.nxv2i64(<vscale x 4 x i32> %a, i32 31)
1180  ret <vscale x 2 x i64> %out
1181}
1182
1183;
1184; USUBLB
1185;
1186
1187define <vscale x 8 x i16> @usublb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1188; CHECK-LABEL: usublb_b:
1189; CHECK:       // %bb.0:
1190; CHECK-NEXT:    usublb z0.h, z0.b, z1.b
1191; CHECK-NEXT:    ret
1192  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.usublb.nxv8i16(<vscale x 16 x i8> %a,
1193                                                                  <vscale x 16 x i8> %b)
1194  ret <vscale x 8 x i16> %out
1195}
1196
1197define <vscale x 4 x i32> @usublb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1198; CHECK-LABEL: usublb_h:
1199; CHECK:       // %bb.0:
1200; CHECK-NEXT:    usublb z0.s, z0.h, z1.h
1201; CHECK-NEXT:    ret
1202  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.usublb.nxv4i32(<vscale x 8 x i16> %a,
1203                                                                  <vscale x 8 x i16> %b)
1204  ret <vscale x 4 x i32> %out
1205}
1206
1207define <vscale x 2 x i64> @usublb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1208; CHECK-LABEL: usublb_s:
1209; CHECK:       // %bb.0:
1210; CHECK-NEXT:    usublb z0.d, z0.s, z1.s
1211; CHECK-NEXT:    ret
1212  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.usublb.nxv2i64(<vscale x 4 x i32> %a,
1213                                                                  <vscale x 4 x i32> %b)
1214  ret <vscale x 2 x i64> %out
1215}
1216
1217;
1218; USUBLT
1219;
1220
1221define <vscale x 8 x i16> @usublt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1222; CHECK-LABEL: usublt_b:
1223; CHECK:       // %bb.0:
1224; CHECK-NEXT:    usublt z0.h, z0.b, z1.b
1225; CHECK-NEXT:    ret
1226  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.usublt.nxv8i16(<vscale x 16 x i8> %a,
1227                                                                  <vscale x 16 x i8> %b)
1228  ret <vscale x 8 x i16> %out
1229}
1230
1231define <vscale x 4 x i32> @usublt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1232; CHECK-LABEL: usublt_h:
1233; CHECK:       // %bb.0:
1234; CHECK-NEXT:    usublt z0.s, z0.h, z1.h
1235; CHECK-NEXT:    ret
1236  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.usublt.nxv4i32(<vscale x 8 x i16> %a,
1237                                                                  <vscale x 8 x i16> %b)
1238  ret <vscale x 4 x i32> %out
1239}
1240
1241define <vscale x 2 x i64> @usublt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1242; CHECK-LABEL: usublt_s:
1243; CHECK:       // %bb.0:
1244; CHECK-NEXT:    usublt z0.d, z0.s, z1.s
1245; CHECK-NEXT:    ret
1246  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.usublt.nxv2i64(<vscale x 4 x i32> %a,
1247                                                                  <vscale x 4 x i32> %b)
1248  ret <vscale x 2 x i64> %out
1249}
1250
1251;
1252; USUBWB
1253;
1254
1255define <vscale x 8 x i16> @usubwb_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b) {
1256; CHECK-LABEL: usubwb_b:
1257; CHECK:       // %bb.0:
1258; CHECK-NEXT:    usubwb z0.h, z0.h, z1.b
1259; CHECK-NEXT:    ret
1260  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.usubwb.nxv8i16(<vscale x 8 x i16> %a,
1261                                                                  <vscale x 16 x i8> %b)
1262  ret <vscale x 8 x i16> %out
1263}
1264
1265define <vscale x 4 x i32> @usubwb_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b) {
1266; CHECK-LABEL: usubwb_h:
1267; CHECK:       // %bb.0:
1268; CHECK-NEXT:    usubwb z0.s, z0.s, z1.h
1269; CHECK-NEXT:    ret
1270  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.usubwb.nxv4i32(<vscale x 4 x i32> %a,
1271                                                                  <vscale x 8 x i16> %b)
1272  ret <vscale x 4 x i32> %out
1273}
1274
1275define <vscale x 2 x i64> @usubwb_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b) {
1276; CHECK-LABEL: usubwb_s:
1277; CHECK:       // %bb.0:
1278; CHECK-NEXT:    usubwb z0.d, z0.d, z1.s
1279; CHECK-NEXT:    ret
1280  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.usubwb.nxv2i64(<vscale x 2 x i64> %a,
1281                                                                  <vscale x 4 x i32> %b)
1282  ret <vscale x 2 x i64> %out
1283}
1284
1285;
1286; USUBWT
1287;
1288
1289define <vscale x 8 x i16> @usubwt_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b) {
1290; CHECK-LABEL: usubwt_b:
1291; CHECK:       // %bb.0:
1292; CHECK-NEXT:    usubwt z0.h, z0.h, z1.b
1293; CHECK-NEXT:    ret
1294  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.usubwt.nxv8i16(<vscale x 8 x i16> %a,
1295                                                                  <vscale x 16 x i8> %b)
1296  ret <vscale x 8 x i16> %out
1297}
1298
1299define <vscale x 4 x i32> @usubwt_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b) {
1300; CHECK-LABEL: usubwt_h:
1301; CHECK:       // %bb.0:
1302; CHECK-NEXT:    usubwt z0.s, z0.s, z1.h
1303; CHECK-NEXT:    ret
1304  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.usubwt.nxv4i32(<vscale x 4 x i32> %a,
1305                                                                  <vscale x 8 x i16> %b)
1306  ret <vscale x 4 x i32> %out
1307}
1308
1309define <vscale x 2 x i64> @usubwt_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b) {
1310; CHECK-LABEL: usubwt_s:
1311; CHECK:       // %bb.0:
1312; CHECK-NEXT:    usubwt z0.d, z0.d, z1.s
1313; CHECK-NEXT:    ret
1314  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.usubwt.nxv2i64(<vscale x 2 x i64> %a,
1315                                                                  <vscale x 4 x i32> %b)
1316  ret <vscale x 2 x i64> %out
1317}
1318
1319declare <vscale x 8 x i16> @llvm.aarch64.sve.sabalb.nxv8i16(<vscale x 8 x i16>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1320declare <vscale x 4 x i32> @llvm.aarch64.sve.sabalb.nxv4i32(<vscale x 4 x i32>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1321declare <vscale x 2 x i64> @llvm.aarch64.sve.sabalb.nxv2i64(<vscale x 2 x i64>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1322
1323declare <vscale x 8 x i16> @llvm.aarch64.sve.sabalt.nxv8i16(<vscale x 8 x i16>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1324declare <vscale x 4 x i32> @llvm.aarch64.sve.sabalt.nxv4i32(<vscale x 4 x i32>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1325declare <vscale x 2 x i64> @llvm.aarch64.sve.sabalt.nxv2i64(<vscale x 2 x i64>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1326
1327declare <vscale x 8 x i16> @llvm.aarch64.sve.sabdlb.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>)
1328declare <vscale x 4 x i32> @llvm.aarch64.sve.sabdlb.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>)
1329declare <vscale x 2 x i64> @llvm.aarch64.sve.sabdlb.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>)
1330
1331declare <vscale x 8 x i16> @llvm.aarch64.sve.sabdlt.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>)
1332declare <vscale x 4 x i32> @llvm.aarch64.sve.sabdlt.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>)
1333declare <vscale x 2 x i64> @llvm.aarch64.sve.sabdlt.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>)
1334
1335declare <vscale x 8 x i16> @llvm.aarch64.sve.saddlb.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>)
1336declare <vscale x 4 x i32> @llvm.aarch64.sve.saddlb.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>)
1337declare <vscale x 2 x i64> @llvm.aarch64.sve.saddlb.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>)
1338
1339declare <vscale x 8 x i16> @llvm.aarch64.sve.saddlt.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>)
1340declare <vscale x 4 x i32> @llvm.aarch64.sve.saddlt.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>)
1341declare <vscale x 2 x i64> @llvm.aarch64.sve.saddlt.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>)
1342
1343declare <vscale x 8 x i16> @llvm.aarch64.sve.saddwb.nxv8i16(<vscale x 8 x i16>, <vscale x 16 x i8>)
1344declare <vscale x 4 x i32> @llvm.aarch64.sve.saddwb.nxv4i32(<vscale x 4 x i32>, <vscale x 8 x i16>)
1345declare <vscale x 2 x i64> @llvm.aarch64.sve.saddwb.nxv2i64(<vscale x 2 x i64>, <vscale x 4 x i32>)
1346
1347declare <vscale x 8 x i16> @llvm.aarch64.sve.saddwt.nxv8i16(<vscale x 8 x i16>, <vscale x 16 x i8>)
1348declare <vscale x 4 x i32> @llvm.aarch64.sve.saddwt.nxv4i32(<vscale x 4 x i32>, <vscale x 8 x i16>)
1349declare <vscale x 2 x i64> @llvm.aarch64.sve.saddwt.nxv2i64(<vscale x 2 x i64>, <vscale x 4 x i32>)
1350
1351declare <vscale x 8 x i16> @llvm.aarch64.sve.smullb.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>)
1352declare <vscale x 4 x i32> @llvm.aarch64.sve.smullb.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>)
1353declare <vscale x 2 x i64> @llvm.aarch64.sve.smullb.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>)
1354
1355declare <vscale x 4 x i32> @llvm.aarch64.sve.smullb.lane.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
1356declare <vscale x 2 x i64> @llvm.aarch64.sve.smullb.lane.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
1357
1358declare <vscale x 8 x i16> @llvm.aarch64.sve.smullt.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>)
1359declare <vscale x 4 x i32> @llvm.aarch64.sve.smullt.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>)
1360declare <vscale x 2 x i64> @llvm.aarch64.sve.smullt.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>)
1361
1362declare <vscale x 4 x i32> @llvm.aarch64.sve.smullt.lane.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
1363declare <vscale x 2 x i64> @llvm.aarch64.sve.smullt.lane.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
1364
1365declare <vscale x 8 x i16> @llvm.aarch64.sve.sqdmullb.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>)
1366declare <vscale x 4 x i32> @llvm.aarch64.sve.sqdmullb.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>)
1367declare <vscale x 2 x i64> @llvm.aarch64.sve.sqdmullb.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>)
1368
1369declare <vscale x 4 x i32> @llvm.aarch64.sve.sqdmullb.lane.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
1370declare <vscale x 2 x i64> @llvm.aarch64.sve.sqdmullb.lane.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
1371
1372declare <vscale x 8 x i16> @llvm.aarch64.sve.sqdmullt.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>)
1373declare <vscale x 4 x i32> @llvm.aarch64.sve.sqdmullt.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>)
1374declare <vscale x 2 x i64> @llvm.aarch64.sve.sqdmullt.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>)
1375
1376declare <vscale x 4 x i32> @llvm.aarch64.sve.sqdmullt.lane.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
1377declare <vscale x 2 x i64> @llvm.aarch64.sve.sqdmullt.lane.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
1378
1379declare <vscale x 8 x i16> @llvm.aarch64.sve.sshllb.nxv8i16(<vscale x 16 x i8>, i32)
1380declare <vscale x 4 x i32> @llvm.aarch64.sve.sshllb.nxv4i32(<vscale x 8 x i16>, i32)
1381declare <vscale x 2 x i64> @llvm.aarch64.sve.sshllb.nxv2i64(<vscale x 4 x i32>, i32)
1382
1383declare <vscale x 8 x i16> @llvm.aarch64.sve.sshllt.nxv8i16(<vscale x 16 x i8>, i32)
1384declare <vscale x 4 x i32> @llvm.aarch64.sve.sshllt.nxv4i32(<vscale x 8 x i16>, i32)
1385declare <vscale x 2 x i64> @llvm.aarch64.sve.sshllt.nxv2i64(<vscale x 4 x i32>, i32)
1386
1387declare <vscale x 8 x i16> @llvm.aarch64.sve.ssublb.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>)
1388declare <vscale x 4 x i32> @llvm.aarch64.sve.ssublb.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>)
1389declare <vscale x 2 x i64> @llvm.aarch64.sve.ssublb.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>)
1390
1391declare <vscale x 8 x i16> @llvm.aarch64.sve.ssublt.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>)
1392declare <vscale x 4 x i32> @llvm.aarch64.sve.ssublt.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>)
1393declare <vscale x 2 x i64> @llvm.aarch64.sve.ssublt.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>)
1394
1395declare <vscale x 8 x i16> @llvm.aarch64.sve.ssubwb.nxv8i16(<vscale x 8 x i16>, <vscale x 16 x i8>)
1396declare <vscale x 4 x i32> @llvm.aarch64.sve.ssubwb.nxv4i32(<vscale x 4 x i32>, <vscale x 8 x i16>)
1397declare <vscale x 2 x i64> @llvm.aarch64.sve.ssubwb.nxv2i64(<vscale x 2 x i64>, <vscale x 4 x i32>)
1398
1399declare <vscale x 8 x i16> @llvm.aarch64.sve.ssubwt.nxv8i16(<vscale x 8 x i16>, <vscale x 16 x i8>)
1400declare <vscale x 4 x i32> @llvm.aarch64.sve.ssubwt.nxv4i32(<vscale x 4 x i32>, <vscale x 8 x i16>)
1401declare <vscale x 2 x i64> @llvm.aarch64.sve.ssubwt.nxv2i64(<vscale x 2 x i64>, <vscale x 4 x i32>)
1402
1403declare <vscale x 8 x i16> @llvm.aarch64.sve.uabalb.nxv8i16(<vscale x 8 x i16>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1404declare <vscale x 4 x i32> @llvm.aarch64.sve.uabalb.nxv4i32(<vscale x 4 x i32>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1405declare <vscale x 2 x i64> @llvm.aarch64.sve.uabalb.nxv2i64(<vscale x 2 x i64>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1406
1407declare <vscale x 8 x i16> @llvm.aarch64.sve.uabalt.nxv8i16(<vscale x 8 x i16>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1408declare <vscale x 4 x i32> @llvm.aarch64.sve.uabalt.nxv4i32(<vscale x 4 x i32>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1409declare <vscale x 2 x i64> @llvm.aarch64.sve.uabalt.nxv2i64(<vscale x 2 x i64>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1410
1411declare <vscale x 8 x i16> @llvm.aarch64.sve.uabdlb.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>)
1412declare <vscale x 4 x i32> @llvm.aarch64.sve.uabdlb.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>)
1413declare <vscale x 2 x i64> @llvm.aarch64.sve.uabdlb.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>)
1414
1415declare <vscale x 8 x i16> @llvm.aarch64.sve.uabdlt.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>)
1416declare <vscale x 4 x i32> @llvm.aarch64.sve.uabdlt.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>)
1417declare <vscale x 2 x i64> @llvm.aarch64.sve.uabdlt.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>)
1418
1419declare <vscale x 8 x i16> @llvm.aarch64.sve.uaddlb.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>)
1420declare <vscale x 4 x i32> @llvm.aarch64.sve.uaddlb.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>)
1421declare <vscale x 2 x i64> @llvm.aarch64.sve.uaddlb.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>)
1422
1423declare <vscale x 8 x i16> @llvm.aarch64.sve.uaddlt.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>)
1424declare <vscale x 4 x i32> @llvm.aarch64.sve.uaddlt.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>)
1425declare <vscale x 2 x i64> @llvm.aarch64.sve.uaddlt.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>)
1426
1427declare <vscale x 8 x i16> @llvm.aarch64.sve.uaddwb.nxv8i16(<vscale x 8 x i16>, <vscale x 16 x i8>)
1428declare <vscale x 4 x i32> @llvm.aarch64.sve.uaddwb.nxv4i32(<vscale x 4 x i32>, <vscale x 8 x i16>)
1429declare <vscale x 2 x i64> @llvm.aarch64.sve.uaddwb.nxv2i64(<vscale x 2 x i64>, <vscale x 4 x i32>)
1430
1431declare <vscale x 8 x i16> @llvm.aarch64.sve.uaddwt.nxv8i16(<vscale x 8 x i16>, <vscale x 16 x i8>)
1432declare <vscale x 4 x i32> @llvm.aarch64.sve.uaddwt.nxv4i32(<vscale x 4 x i32>, <vscale x 8 x i16>)
1433declare <vscale x 2 x i64> @llvm.aarch64.sve.uaddwt.nxv2i64(<vscale x 2 x i64>, <vscale x 4 x i32>)
1434
1435declare <vscale x 8 x i16> @llvm.aarch64.sve.umullb.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>)
1436declare <vscale x 4 x i32> @llvm.aarch64.sve.umullb.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>)
1437declare <vscale x 2 x i64> @llvm.aarch64.sve.umullb.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>)
1438
1439declare <vscale x 4 x i32> @llvm.aarch64.sve.umullb.lane.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
1440declare <vscale x 2 x i64> @llvm.aarch64.sve.umullb.lane.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
1441
1442declare <vscale x 8 x i16> @llvm.aarch64.sve.umullt.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>)
1443declare <vscale x 4 x i32> @llvm.aarch64.sve.umullt.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>)
1444declare <vscale x 2 x i64> @llvm.aarch64.sve.umullt.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>)
1445
1446declare <vscale x 4 x i32> @llvm.aarch64.sve.umullt.lane.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
1447declare <vscale x 2 x i64> @llvm.aarch64.sve.umullt.lane.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
1448
1449declare <vscale x 8 x i16> @llvm.aarch64.sve.ushllb.nxv8i16(<vscale x 16 x i8>, i32)
1450declare <vscale x 4 x i32> @llvm.aarch64.sve.ushllb.nxv4i32(<vscale x 8 x i16>, i32)
1451declare <vscale x 2 x i64> @llvm.aarch64.sve.ushllb.nxv2i64(<vscale x 4 x i32>, i32)
1452
1453declare <vscale x 8 x i16> @llvm.aarch64.sve.ushllt.nxv8i16(<vscale x 16 x i8>, i32)
1454declare <vscale x 4 x i32> @llvm.aarch64.sve.ushllt.nxv4i32(<vscale x 8 x i16>, i32)
1455declare <vscale x 2 x i64> @llvm.aarch64.sve.ushllt.nxv2i64(<vscale x 4 x i32>, i32)
1456
1457declare <vscale x 8 x i16> @llvm.aarch64.sve.usublb.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>)
1458declare <vscale x 4 x i32> @llvm.aarch64.sve.usublb.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>)
1459declare <vscale x 2 x i64> @llvm.aarch64.sve.usublb.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>)
1460
1461declare <vscale x 8 x i16> @llvm.aarch64.sve.usublt.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>)
1462declare <vscale x 4 x i32> @llvm.aarch64.sve.usublt.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>)
1463declare <vscale x 2 x i64> @llvm.aarch64.sve.usublt.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>)
1464
1465declare <vscale x 8 x i16> @llvm.aarch64.sve.usubwb.nxv8i16(<vscale x 8 x i16>, <vscale x 16 x i8>)
1466declare <vscale x 4 x i32> @llvm.aarch64.sve.usubwb.nxv4i32(<vscale x 4 x i32>, <vscale x 8 x i16>)
1467declare <vscale x 2 x i64> @llvm.aarch64.sve.usubwb.nxv2i64(<vscale x 2 x i64>, <vscale x 4 x i32>)
1468
1469declare <vscale x 8 x i16> @llvm.aarch64.sve.usubwt.nxv8i16(<vscale x 8 x i16>, <vscale x 16 x i8>)
1470declare <vscale x 4 x i32> @llvm.aarch64.sve.usubwt.nxv4i32(<vscale x 4 x i32>, <vscale x 8 x i16>)
1471declare <vscale x 2 x i64> @llvm.aarch64.sve.usubwt.nxv2i64(<vscale x 2 x i64>, <vscale x 4 x i32>)
1472